bioinform 0.1.17 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'bioinform/data_models/pm'
|
2
|
+
require 'bioinform/formatters/consensus_formatter'
|
3
|
+
|
4
|
+
describe Bioinform::ConsensusFormatter do
|
5
|
+
let(:pm) { Bioinform::MotifModel::PM.new([[10,30,10,28], [30,16,16,16], [12,30,10,26], [26,27,27,1]]) }
|
6
|
+
|
7
|
+
specify('.new without a block raises error') { expect{ Bioinform::ConsensusFormatter.new }.to raise_error Bioinform::Error }
|
8
|
+
|
9
|
+
context 'custom formatter' do
|
10
|
+
let(:formatter){ Bioinform::ConsensusFormatter.new{|pos, el, ind| (pos.max - el) < pos.max * 0.1 } }
|
11
|
+
specify{ expect(formatter.format_string(pm)).to eq 'YACV' }
|
12
|
+
end
|
13
|
+
|
14
|
+
context 'standard formatter' do
|
15
|
+
let(:formatter){ Bioinform::ConsensusFormatter.by_maximal_elements }
|
16
|
+
specify{ expect(formatter.format_string(pm)).to eq 'CACS' }
|
17
|
+
end
|
18
|
+
|
19
|
+
specify do
|
20
|
+
expect{|b|
|
21
|
+
Bioinform::ConsensusFormatter.new(&b).format_string(pm)
|
22
|
+
}.to yield_successive_args( *([ [[10,30,10,28],10,0], # col,el,ind
|
23
|
+
[[10,30,10,28],30,1] ] + # col,el,ind
|
24
|
+
[Array]*14 ) ) # rest triples
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'bioinform/formatters/motif_formatter'
|
2
|
+
require 'bioinform/data_models/pm'
|
3
|
+
|
4
|
+
describe Bioinform::MotifFormatter do
|
5
|
+
let(:matrix) { [[1,2.345,6.7,8.99],
|
6
|
+
[10,11.123,-15.678,16]] }
|
7
|
+
let(:motif) { Bioinform::MotifModel::PM.new(matrix) }
|
8
|
+
let(:default_matrix_string) { "1 2.345 6.7 8.99\n"+
|
9
|
+
"10 11.123 -15.678 16" }
|
10
|
+
|
11
|
+
context 'with default configuration' do
|
12
|
+
let(:formatter) { Bioinform::MotifFormatter.new }
|
13
|
+
specify { expect(formatter.with_name).to eq :auto }
|
14
|
+
specify { expect(formatter.nucleotides_in).to eq :columns }
|
15
|
+
specify { expect(formatter.precision).to be_falsy }
|
16
|
+
specify { expect(formatter.with_nucleotide_header).to eq false }
|
17
|
+
specify { expect(formatter.with_position_header).to eq false }
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'with with_name equal to false' do
|
21
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_name: false) }
|
22
|
+
specify { expect( formatter.format(motif) ).to eq default_matrix_string }
|
23
|
+
specify { expect( formatter.format(motif.named('Stub name')) ).to eq default_matrix_string }
|
24
|
+
end
|
25
|
+
context 'with with_name equal to true' do
|
26
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_name: true) }
|
27
|
+
specify { expect{ formatter.format(motif) }.to raise_error Bioinform::Error }
|
28
|
+
specify { expect( formatter.format(motif.named('')) ).to eq ">\n" +
|
29
|
+
default_matrix_string }
|
30
|
+
specify { expect( formatter.format(motif.named('Stub name')) ).to eq ">Stub name\n" +
|
31
|
+
default_matrix_string }
|
32
|
+
end
|
33
|
+
context 'with with_name equal to :auto' do
|
34
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_name: :auto) }
|
35
|
+
specify { expect( formatter.format(motif) ).to eq default_matrix_string }
|
36
|
+
specify { expect( formatter.format(motif.named('')) ).to eq default_matrix_string }
|
37
|
+
specify { expect( formatter.format(motif.named('Stub name')) ).to eq ">Stub name\n" +
|
38
|
+
default_matrix_string }
|
39
|
+
end
|
40
|
+
context 'with with_name value different from true/false/:auto' do
|
41
|
+
specify{ expect { Bioinform::MotifFormatter.new(with_name: :somewhat) }.to raise_error Bioinform::Error }
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'with nucleotides_in :columns' do
|
45
|
+
let(:formatter) { Bioinform::MotifFormatter.new(nucleotides_in: :columns) }
|
46
|
+
specify { expect( formatter.format(motif) ).to eq "1 2.345 6.7 8.99\n" +
|
47
|
+
"10 11.123 -15.678 16" }
|
48
|
+
end
|
49
|
+
context 'with nucleotides_in :rows' do
|
50
|
+
let(:formatter) { Bioinform::MotifFormatter.new(nucleotides_in: :rows) }
|
51
|
+
specify { expect( formatter.format(motif) ).to eq "1 10\n" +
|
52
|
+
"2.345 11.123\n" +
|
53
|
+
"6.7 -15.678\n" +
|
54
|
+
"8.99 16" }
|
55
|
+
end
|
56
|
+
context 'with nucleotides_in not equal to :rows or :columns' do
|
57
|
+
specify { expect{ Bioinform::MotifFormatter.new(nucleotides_in: :somewhat) }.to raise_error(Bioinform::Error) }
|
58
|
+
end
|
59
|
+
|
60
|
+
context 'with precision equal to false' do
|
61
|
+
let(:formatter) { Bioinform::MotifFormatter.new(precision: false) }
|
62
|
+
specify { expect( formatter.format(motif) ).to eq "1 2.345 6.7 8.99\n" +
|
63
|
+
"10 11.123 -15.678 16" }
|
64
|
+
end
|
65
|
+
context 'with precision equal to a number' do
|
66
|
+
let(:formatter) { Bioinform::MotifFormatter.new(precision: 3) }
|
67
|
+
specify { expect( formatter.format(motif) ).to eq "1 2.35 6.7 8.99\n" +
|
68
|
+
"10 11.1 -15.7 16" }
|
69
|
+
end
|
70
|
+
|
71
|
+
context 'with nucleotide header' do
|
72
|
+
context 'with nucleotides in columns' do
|
73
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_nucleotide_header: true, nucleotides_in: :columns) }
|
74
|
+
specify { expect( formatter.format(motif) ).to eq "A C G T\n" +
|
75
|
+
"1 2.345 6.7 8.99\n" +
|
76
|
+
"10 11.123 -15.678 16" }
|
77
|
+
|
78
|
+
end
|
79
|
+
context 'with nucleotides in rows' do
|
80
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_nucleotide_header: true, nucleotides_in: :rows) }
|
81
|
+
specify { expect( formatter.format(motif) ).to eq "A 1 10\n" +
|
82
|
+
"C 2.345 11.123\n" +
|
83
|
+
"G 6.7 -15.678\n" +
|
84
|
+
"T 8.99 16" }
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
context 'with position header' do
|
89
|
+
context 'with nucleotides in columns' do
|
90
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_position_header: true, nucleotides_in: :columns) }
|
91
|
+
let(:long_motif) { Bioinform::MotifModel::PM.new([[1,2,3,4]] * 12) }
|
92
|
+
specify { expect( formatter.format(motif) ).to eq "01 1 2.345 6.7 8.99\n" +
|
93
|
+
"02 10 11.123 -15.678 16" }
|
94
|
+
specify { expect( formatter.format(long_motif) ).to eq "01 1 2 3 4\n" +
|
95
|
+
"02 1 2 3 4\n" +
|
96
|
+
"03 1 2 3 4\n" +
|
97
|
+
"04 1 2 3 4\n" +
|
98
|
+
"05 1 2 3 4\n" +
|
99
|
+
"06 1 2 3 4\n" +
|
100
|
+
"07 1 2 3 4\n" +
|
101
|
+
"08 1 2 3 4\n" +
|
102
|
+
"09 1 2 3 4\n" +
|
103
|
+
"10 1 2 3 4\n" +
|
104
|
+
"11 1 2 3 4\n" +
|
105
|
+
"12 1 2 3 4" }
|
106
|
+
end
|
107
|
+
context 'with nucleotides in rows' do
|
108
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_position_header: true, nucleotides_in: :rows) }
|
109
|
+
specify { expect( formatter.format(motif) ).to eq "01 02\n" +
|
110
|
+
"1 10\n" +
|
111
|
+
"2.345 11.123\n" +
|
112
|
+
"6.7 -15.678\n" +
|
113
|
+
"8.99 16" }
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context 'with both headers' do
|
118
|
+
context 'with nucleotides in columns' do
|
119
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_position_header: true, with_nucleotide_header: true, nucleotides_in: :columns) }
|
120
|
+
specify { expect( formatter.format(motif) ).to eq " A C G T\n" +
|
121
|
+
"01 1 2.345 6.7 8.99\n" +
|
122
|
+
"02 10 11.123 -15.678 16" }
|
123
|
+
end
|
124
|
+
context 'with nucleotides in rows' do
|
125
|
+
let(:formatter) { Bioinform::MotifFormatter.new(with_position_header: true, with_nucleotide_header: true, nucleotides_in: :rows) }
|
126
|
+
specify { expect( formatter.format(motif) ).to eq " 01 02\n" +
|
127
|
+
"A 1 10\n" +
|
128
|
+
"C 2.345 11.123\n" +
|
129
|
+
"G 6.7 -15.678\n" +
|
130
|
+
"T 8.99 16" }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
context 'on different alphabet' do
|
135
|
+
let(:matrix_15) { [[1,2,3,1.567, 12,-11,12,0,-1.1,0.6, 0.4,0.321,0.11,-1.23, 2.0],
|
136
|
+
[0,0,0,0, 0,0,0,0,0,0, 0,0,0,0, 0]] }
|
137
|
+
let(:motif) { Bioinform::MotifModel::PM.new(matrix_15, alphabet: Bioinform::IUPACAlphabet) }
|
138
|
+
|
139
|
+
specify {
|
140
|
+
expect( Bioinform::MotifFormatter.new.format(motif) )
|
141
|
+
.to eq "1 2 3 1.567 12 -11 12 0 -1.1 0.6 0.4 0.321 0.11 -1.23 2.0\n" +
|
142
|
+
"0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"
|
143
|
+
}
|
144
|
+
specify {
|
145
|
+
expect( Bioinform::MotifFormatter.new(with_nucleotide_header: true).format(motif) )
|
146
|
+
.to eq "A C G T M R W S Y K V H D B N\n" +
|
147
|
+
"1 2 3 1.567 12 -11 12 0 -1.1 0.6 0.4 0.321 0.11 -1.23 2.0\n" +
|
148
|
+
"0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"
|
149
|
+
}
|
150
|
+
specify {
|
151
|
+
expect( Bioinform::MotifFormatter.new(with_nucleotide_header: true, nucleotides_in: :rows).format(motif) )
|
152
|
+
.to eq "A 1 0\n" +
|
153
|
+
"C 2 0\n" +
|
154
|
+
"G 3 0\n" +
|
155
|
+
"T 1.567 0\n" +
|
156
|
+
"M 12 0\n" +
|
157
|
+
"R -11 0\n" +
|
158
|
+
"W 12 0\n" +
|
159
|
+
"S 0 0\n" +
|
160
|
+
"Y -1.1 0\n" +
|
161
|
+
"K 0.6 0\n" +
|
162
|
+
"V 0.4 0\n" +
|
163
|
+
"H 0.321 0\n" +
|
164
|
+
"D 0.11 0\n" +
|
165
|
+
"B -1.23 0\n" +
|
166
|
+
"N 2.0 0"
|
167
|
+
}
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,216 @@
|
|
1
|
+
require 'bioinform/parsers/matrix_parser'
|
2
|
+
|
3
|
+
describe Bioinform::MatrixParser do
|
4
|
+
specify { expect{ Bioinform::MatrixParser.new(nucleotides_in: :somewhat) }.to raise_error Bioinform::Error }
|
5
|
+
|
6
|
+
context 'with default options' do
|
7
|
+
subject(:parser) { Bioinform::MatrixParser.new }
|
8
|
+
specify { expect(parser.has_name).to eq :auto }
|
9
|
+
specify { expect(parser.has_header_row).to eq false }
|
10
|
+
specify { expect(parser.has_header_column).to eq false }
|
11
|
+
specify { expect(parser.nucleotides_in).to eq :auto }
|
12
|
+
specify { expect(parser.fix_nucleotides_number).to eq 4 }
|
13
|
+
|
14
|
+
specify { expect(parser.name_pattern).to match ">Motif_name" }
|
15
|
+
specify { expect(parser.name_pattern).to match ">Motif name" }
|
16
|
+
specify { expect(parser.name_pattern).to match "> Motif name" }
|
17
|
+
specify { expect(parser.name_pattern).to match "Motif name" }
|
18
|
+
specify { expect(parser.name_pattern).to match "Motif name\tother info" }
|
19
|
+
|
20
|
+
specify { expect(parser.name_pattern.match(">Motif_name")[:name]).to eq "Motif_name" }
|
21
|
+
specify { expect(parser.name_pattern.match(">Motif name")[:name]).to eq "Motif name" }
|
22
|
+
specify { expect(parser.name_pattern.match("> Motif name")[:name]).to eq "Motif name" }
|
23
|
+
specify { expect(parser.name_pattern.match("Motif name")[:name]).to eq "Motif name" }
|
24
|
+
specify { expect(parser.name_pattern.match("Motif name\tother info")[:name]).to eq "Motif name" }
|
25
|
+
end
|
26
|
+
|
27
|
+
context 'parser having name' do
|
28
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns,has_name: true) }
|
29
|
+
let(:input) {">PM name\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
30
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
31
|
+
|
32
|
+
specify 'trims empty lines' do
|
33
|
+
expect( parser.parse!("\n \t \n" + input + "\n\n") ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} )
|
34
|
+
end
|
35
|
+
end
|
36
|
+
context 'parser having neither name nor header' do
|
37
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: false) }
|
38
|
+
let(:input_allowed) {"1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
39
|
+
let(:input_not_allowed) {">PM Name\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
40
|
+
let(:input_not_allowed_2) {"A\tC\tG\tT\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
41
|
+
let(:input_not_allowed_3) {"##01\t1\t2\t3\t4\n" + "##02\t11\t12\t13\t14" }
|
42
|
+
specify { expect( parser.parse!(input_allowed) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
43
|
+
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error Bioinform::Error }
|
44
|
+
specify { expect{ parser.parse!(input_not_allowed_2) }.to raise_error Bioinform::Error }
|
45
|
+
specify { expect{ parser.parse!(input_not_allowed_3) }.to raise_error Bioinform::Error }
|
46
|
+
end
|
47
|
+
context 'with has_name equal to :auto parser can either have name or not' do
|
48
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: :auto) }
|
49
|
+
let(:input_without_name) {"1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
50
|
+
let(:input_with_name) {">PM Name\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
51
|
+
let(:input_with_bad_name) {"-Name\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
52
|
+
specify { expect( parser.parse!(input_without_name) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
53
|
+
specify { expect( parser.parse!(input_with_name) ).to eq( {name: 'PM Name', matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
54
|
+
specify { expect{ parser.parse!(input_with_bad_name) }.to raise_error Bioinform::Error }
|
55
|
+
end
|
56
|
+
context 'parser having name and header row' do
|
57
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: true, has_header_row: true) }
|
58
|
+
let(:input) {">PM name\n" + "A\tC\tG\tT\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
59
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
60
|
+
end
|
61
|
+
context 'parser having header row' do
|
62
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_name: false, has_header_row: true) }
|
63
|
+
let(:input) {"A\tC\tG\tT\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
64
|
+
specify { expect( parser.parse!(input) ).to eq( {name: nil, matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
65
|
+
specify { expect{ parser.parse!("Motif name\n" + input) }.to raise_error Bioinform::Error }
|
66
|
+
end
|
67
|
+
context 'parser having header column' do
|
68
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_header_column: true) }
|
69
|
+
let(:input) {">PM name\n" + "##01\t1\t2\t3\t4\n" + "##02\t11\t12\t13\t14" }
|
70
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
71
|
+
end
|
72
|
+
context 'parser having both headers' do
|
73
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :columns, has_header_row: true, has_header_column: true) }
|
74
|
+
let(:input) {">PM name\n" + "X\tA\tC\tG\tT\n" + "##01\t1\t2\t3\t4\n" + "##02\t11\t12\t13\t14" }
|
75
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
76
|
+
end
|
77
|
+
|
78
|
+
context 'parser for transposed matrix' do
|
79
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows) }
|
80
|
+
let(:input) {">PM name\n" + "1\t11\n" + "2\t12\n" + "3\t13\n" + "4\t14" }
|
81
|
+
specify { expect( parser.parse!(input) ).to eq( {name: 'PM name', matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
82
|
+
end
|
83
|
+
context 'parser for transposed matrix with row header' do
|
84
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows, has_header_row: true) }
|
85
|
+
let(:input) {">PM name\n" + "##01\t##02\n" + "1\t11\n" + "2\t12\n" + "3\t13\n" + "4\t14" }
|
86
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
87
|
+
end
|
88
|
+
context 'parser for transposed matrix with column header' do
|
89
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows, has_header_column: true) }
|
90
|
+
let(:input) {">PM name\n" + "A\t1\t11\n" + "C\t2\t12\n" + "G\t3\t13\n" + "T\t4\t14" }
|
91
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
92
|
+
end
|
93
|
+
context 'parser for transposed matrix with both header' do
|
94
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows, has_header_column: true, has_header_row: true) }
|
95
|
+
let(:input) {">PM name\n" + "X\t##01\t##02\n" + "A\t1\t11\n" + "C\t2\t12\n" + "G\t3\t13\n" + "T\t4\t14" }
|
96
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
97
|
+
end
|
98
|
+
|
99
|
+
context 'parser having custom name pattern' do
|
100
|
+
subject(:parser) { Bioinform::MatrixParser.new(has_name: true, name_pattern: /^NA>(?<name>.+)$/) }
|
101
|
+
let(:input_allowed) {"NA>Motif name\tother info\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
102
|
+
let(:input_not_allowed) {"Motif name\tother info\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
103
|
+
specify { expect( parser.parse!(input_allowed) ).to eq( {name: "Motif name\tother info", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
104
|
+
specify { expect{ parser.parse!(input_not_allowed) }.to raise_error Bioinform::Error }
|
105
|
+
end
|
106
|
+
|
107
|
+
context 'parser reducing number of nucleotides' do
|
108
|
+
subject(:parser) { Bioinform::MatrixParser.new(has_name: true) }
|
109
|
+
let(:input) {">PM name\n" + "1\t2\t3\t4\t5\n" + "11\t12\t13\t14\t15" }
|
110
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
111
|
+
end
|
112
|
+
context 'parser for transposed matrix reducing number of nucleotides' do
|
113
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :rows) }
|
114
|
+
let(:input) {">PM name\n" + "1\t11\n" + "2\t12\n" + "3\t13\n" + "4\t14\n" + "5\t15"}
|
115
|
+
specify { expect( parser.parse!(input) ).to eq( {name: 'PM name', matrix: [[1,2,3,4],[11,12,13,14]]} ) }
|
116
|
+
end
|
117
|
+
context 'parser not reducing number of nucleotides' do
|
118
|
+
subject(:parser) { Bioinform::MatrixParser.new(has_name: true, fix_nucleotides_number: false) }
|
119
|
+
let(:input) {">PM name\n" + "1\t2\t3\t4\t5\n" + "11\t12\t13\t14\t15" }
|
120
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3,4,5],[11,12,13,14,15]]} ) }
|
121
|
+
end
|
122
|
+
context 'parser reducing number of nucleotides to a non-standard one' do
|
123
|
+
subject(:parser) { Bioinform::MatrixParser.new(has_name: true, fix_nucleotides_number: 3) }
|
124
|
+
let(:input) {">PM name\n" + "1\t2\t3\t4\t5\n" + "11\t12\t13\t14\t15" }
|
125
|
+
specify { expect( parser.parse!(input) ).to eq( {name: "PM name", matrix: [[1,2,3],[11,12,13]]} ) }
|
126
|
+
end
|
127
|
+
context 'parser which hasn\'t enough number of nucleotides' do
|
128
|
+
subject(:parser) { Bioinform::MatrixParser.new(has_name: true, fix_nucleotides_number: 4) }
|
129
|
+
let(:input) {">PM name\n" + "1\t2\t3\n" + "11\t12\t13" }
|
130
|
+
specify { expect{ parser.parse!(input) }.to raise_error Bioinform::Error }
|
131
|
+
end
|
132
|
+
|
133
|
+
context 'parser with auto transposition' do
|
134
|
+
let(:input_not_transposed) {">PM Name\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14" }
|
135
|
+
let(:input_transposed) {">PM Name\n" + "1\t11\n" + "2\t12\n" + "3\t13\n" + "4\t14"}
|
136
|
+
let(:input_4x4) {">PM Name\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14\n" + "1\t2\t3\t4\n" + "11\t12\t13\t14"}
|
137
|
+
context 'with fixed nucleotides number' do
|
138
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :auto, fix_nucleotides_number: 4) }
|
139
|
+
specify { expect(parser.parse(input_not_transposed)).to eq({name:'PM Name', matrix: [[1,2,3,4],[11,12,13,14]]}) }
|
140
|
+
specify { expect(parser.parse(input_transposed)).to eq({name:'PM Name', matrix: [[1,2,3,4],[11,12,13,14]]}) }
|
141
|
+
specify { expect(parser.parse(input_4x4)).to eq({name:'PM Name', matrix: [[1,2,3,4],[11,12,13,14],[1,2,3,4],[11,12,13,14]]}) }
|
142
|
+
end
|
143
|
+
context 'with non fixed nucleotides number' do
|
144
|
+
subject(:parser) { Bioinform::MatrixParser.new(nucleotides_in: :auto, fix_nucleotides_number: false) }
|
145
|
+
specify { expect(parser.parse(input_not_transposed)).to eq({name:'PM Name', matrix: [[1,2,3,4],[11,12,13,14]]}) }
|
146
|
+
specify { expect(parser.parse(input_transposed)).to eq({name:'PM Name', matrix: [[1,11],[2,12],[3,13],[4,14]]}) }
|
147
|
+
specify { expect(parser.parse(input_4x4)).to eq({name:'PM Name', matrix: [[1,2,3,4],[11,12,13,14],[1,2,3,4],[11,12,13,14]]}) }
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
context 'FANTOM-formatted motifs' do
|
152
|
+
let(:parser) do
|
153
|
+
Bioinform::MatrixParser.new( has_name: true, name_pattern: /^NA\s+(?<name>.+)$/,
|
154
|
+
has_header_row: true, has_header_column: true, nucleotides_in: :columns,
|
155
|
+
reduce_to_n_nucleotides: 4 )
|
156
|
+
end
|
157
|
+
|
158
|
+
specify 'parse strings in FANTOM format' do
|
159
|
+
input = "NA PM_name\n" +
|
160
|
+
"P0 A C G T\n" +
|
161
|
+
"P1 1 2 3 4\n" +
|
162
|
+
"P2 5 6 7 8"
|
163
|
+
expect(parser.parse(input)).to eq({matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name'})
|
164
|
+
end
|
165
|
+
|
166
|
+
|
167
|
+
specify 'ignores additional columns' do
|
168
|
+
input = "NA PM_name\n" +
|
169
|
+
"P0 A C G T S P\n" +
|
170
|
+
"P1 1 2 3 4 5 10\n" +
|
171
|
+
"P2 5 6 7 8 5 11"
|
172
|
+
expect(parser.parse(input)).to eq({matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name'})
|
173
|
+
end
|
174
|
+
|
175
|
+
specify 'parses string with more than 10 positions(2-digit row numbers)' do
|
176
|
+
input = "NA PM_name\n" +
|
177
|
+
"P0 A C G T\n" +
|
178
|
+
"P1 1 2 3 4\n" +
|
179
|
+
"P2 5 6 7 8\n" +
|
180
|
+
"P3 1 2 3 4\n" +
|
181
|
+
"P4 5 6 7 8\n" +
|
182
|
+
"P5 1 2 3 4\n" +
|
183
|
+
"P6 5 6 7 8\n" +
|
184
|
+
"P7 1 2 3 4\n" +
|
185
|
+
"P8 5 6 7 8\n" +
|
186
|
+
"P9 1 2 3 4\n" +
|
187
|
+
"P10 5 6 7 8\n" +
|
188
|
+
"P11 1 2 3 4\n" +
|
189
|
+
"P12 5 6 7 8"
|
190
|
+
expect(parser.parse(input)).to eq({matrix: [[1,2,3,4],[5,6,7,8]]*6, name: 'PM_name'})
|
191
|
+
end
|
192
|
+
|
193
|
+
good_cases = {
|
194
|
+
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8", result: {matrix: [[1,2,3,4],[5,6,7,8]], name: nil} },
|
195
|
+
'4xN string' => {input: "1 5\n2 6\n3 7\n 4 8", result: {matrix: [[1,2,3,4],[5,6,7,8]], name: nil} },
|
196
|
+
'string with name' => {input: "PM_name\n1 5\n2 6\n3 7\n 4 8", result: {matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name'} },
|
197
|
+
'string with name (with introduction sign)' => {input: ">\t PM_name\n1 5\n2 6\n3 7\n 4 8", result: {matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name'} },
|
198
|
+
'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
|
199
|
+
result: {matrix: [[1,2,3,4],[5,6,7,8]], name: 'Testmatrix_first:subname+sub-subname'} },
|
200
|
+
'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210", result: {matrix: [[1.23, 4.56, 7.8, 9.0],[9, -8.7, 6.54, -3210]], name: nil} },
|
201
|
+
'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3", result: {matrix: [[1.23, 4.56, 7.8, 9.0],[9, -8.7, 6.54, -3210]], name: nil} },
|
202
|
+
'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8", result: {matrix: [[1,2,3,4],[5,6,7,8]], name: nil} },
|
203
|
+
'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n", result: {matrix: [[1,2,3,4],[5,6,7,8]], name: nil} },
|
204
|
+
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8", result: {matrix: [[1,2,3,4],[5,6,7,8]], name: nil} },
|
205
|
+
}
|
206
|
+
|
207
|
+
bad_cases = {
|
208
|
+
'string with non-numeric input' => {input: "1.23 4.56 78aaa 9.0\n9 -8.7 6.54 -3210" },
|
209
|
+
'string with non-numeric input at the end of line' => {input: "1.23 4.56 78 9.0aaa\n9 -8.7 6.54 -3210" },
|
210
|
+
'string with non-numeric input at a separate line' => {input: "1.23 4.56 78 9.0\naaa\n9 -8.7 6.54 -3210" },
|
211
|
+
'string with empty exponent sign' => {input: "1.23 4.56 7.8 9.0\n 9e -8.7 6.54 3210" }
|
212
|
+
}
|
213
|
+
|
214
|
+
parser_specs(Bioinform::MatrixParser.new, good_cases, bad_cases)
|
215
|
+
end
|
216
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'bioinform/parsers/motif_splitter'
|
2
|
+
|
3
|
+
describe Bioinform::MotifSplitter do
|
4
|
+
let(:motif_unnamed) { "1 2 3 4\n"+"5\t6\t7\t8" }
|
5
|
+
let(:motif_with_floats) { "motif2\n 1.0 1.1 1.2 1.3\n 14 15 16 17\n 19 20 21 22" }
|
6
|
+
let(:motif_with_signs_and_exponents) { "> motif3\n-2.0 1.3e-3 -5.47 5.2\n+3.4 7 3 3" }
|
7
|
+
|
8
|
+
context 'default splitter\n' do
|
9
|
+
let(:motif_splitter) { Bioinform::MotifSplitter.new }
|
10
|
+
|
11
|
+
specify do
|
12
|
+
input = motif_unnamed + "\n" + motif_with_floats + "\n" + motif_with_signs_and_exponents
|
13
|
+
expect(motif_splitter.split(input)).to eq [motif_unnamed, motif_with_floats, motif_with_signs_and_exponents]
|
14
|
+
end
|
15
|
+
|
16
|
+
specify do
|
17
|
+
input = motif_unnamed + "\n" + motif_with_floats + "\n" + motif_with_signs_and_exponents + "\n"
|
18
|
+
expect(motif_splitter.split(input)).to eq [motif_unnamed, motif_with_floats, motif_with_signs_and_exponents]
|
19
|
+
end
|
20
|
+
|
21
|
+
specify do
|
22
|
+
input = "Motif1 name\n" + motif_unnamed + "\n" + motif_with_floats + "\n" + motif_with_signs_and_exponents
|
23
|
+
expect(motif_splitter.split(input)).to eq ["Motif1 name\n" + motif_unnamed, motif_with_floats, motif_with_signs_and_exponents]
|
24
|
+
end
|
25
|
+
|
26
|
+
specify { expect(motif_splitter.split(motif_unnamed + "\n\n" + motif_unnamed)).to eq [motif_unnamed, motif_unnamed] }
|
27
|
+
|
28
|
+
specify { expect(motif_splitter.split(motif_unnamed + "\n\n\n" + motif_unnamed)).to eq [motif_unnamed, motif_unnamed] }
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'with specified pattern' do
|
32
|
+
let(:motif_splitter) { Bioinform::MotifSplitter.new(start_motif_pattern: /^NA\s+\w+$/, splitter_pattern: /^\/\/\s$/) }
|
33
|
+
|
34
|
+
let(:input_1) {
|
35
|
+
"NA motif_1\n" +
|
36
|
+
"P0 A C G T\n" +
|
37
|
+
"P1 0 1 2 3\n" +
|
38
|
+
"P2 4 5 6 7"
|
39
|
+
}
|
40
|
+
|
41
|
+
let(:input_2) {
|
42
|
+
"NA motif_2\n" +
|
43
|
+
"P0 A C G T\n" +
|
44
|
+
"P1 1 2 3 4\n" +
|
45
|
+
"P2 5 6 7 8\n" +
|
46
|
+
"P3 9 10 11 12"
|
47
|
+
}
|
48
|
+
|
49
|
+
let(:input_3) {
|
50
|
+
"NA motif_3\n" +
|
51
|
+
"P0 A C G T\n" +
|
52
|
+
"P1 2 3 4 5\n" +
|
53
|
+
"P2 6 7 8 9"
|
54
|
+
}
|
55
|
+
|
56
|
+
# this input doesn't have pattern of start motif
|
57
|
+
let(:input_wo_name) {
|
58
|
+
"P0 A C G T\n" +
|
59
|
+
"P1 3 4 5 6\n" +
|
60
|
+
"P2 7 8 9 10"
|
61
|
+
}
|
62
|
+
|
63
|
+
specify do
|
64
|
+
input = "//\n" +
|
65
|
+
input_1 + "\n" +
|
66
|
+
"//\n" +
|
67
|
+
"//\n" +
|
68
|
+
input_2 + "\n" +
|
69
|
+
"//\n" +
|
70
|
+
input_3
|
71
|
+
expect(motif_splitter.split(input)).to eq [input_1, input_2, input_3]
|
72
|
+
end
|
73
|
+
|
74
|
+
specify 'splitter (w/o motif starter) is enough to split motifs' do
|
75
|
+
input = input_1 + "\n" +
|
76
|
+
"//\n" +
|
77
|
+
input_wo_name
|
78
|
+
expect(motif_splitter.split(input)).to eq [input_1, input_wo_name]
|
79
|
+
end
|
80
|
+
|
81
|
+
specify 'motif starter (w/o splitter) is enough to split motifs' do
|
82
|
+
input = input_1 + "\n" +
|
83
|
+
input_2
|
84
|
+
expect(motif_splitter.split(input)).to eq [input_1, input_2]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|