bioinform 0.1.17 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -3
- data/LICENSE +0 -1
- data/README.md +1 -1
- data/TODO.txt +23 -30
- data/bin/convert_motif +4 -0
- data/bin/pcm2pwm +1 -1
- data/bin/split_motifs +1 -1
- data/bioinform.gemspec +0 -2
- data/lib/bioinform.rb +54 -16
- data/lib/bioinform/alphabet.rb +85 -0
- data/lib/bioinform/background.rb +90 -0
- data/lib/bioinform/cli.rb +1 -2
- data/lib/bioinform/cli/convert_motif.rb +52 -17
- data/lib/bioinform/cli/pcm2pwm.rb +32 -26
- data/lib/bioinform/cli/split_motifs.rb +31 -30
- data/lib/bioinform/conversion_algorithms.rb +6 -0
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
- data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
- data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
- data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
- data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
- data/lib/bioinform/data_models.rb +1 -7
- data/lib/bioinform/data_models/named_model.rb +38 -0
- data/lib/bioinform/data_models/pcm.rb +18 -28
- data/lib/bioinform/data_models/pm.rb +73 -170
- data/lib/bioinform/data_models/ppm.rb +11 -24
- data/lib/bioinform/data_models/pwm.rb +30 -56
- data/lib/bioinform/errors.rb +17 -0
- data/lib/bioinform/formatters.rb +4 -2
- data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
- data/lib/bioinform/formatters/motif_formatter.rb +69 -0
- data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
- data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
- data/lib/bioinform/parsers.rb +1 -8
- data/lib/bioinform/parsers/matrix_parser.rb +44 -36
- data/lib/bioinform/parsers/motif_splitter.rb +45 -0
- data/lib/bioinform/support.rb +46 -14
- data/lib/bioinform/support/strip_doc.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/alphabet_spec.rb +79 -0
- data/spec/background_spec.rb +57 -0
- data/spec/cli/cli_spec.rb +6 -6
- data/spec/cli/convert_motif_spec.rb +88 -88
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
- data/spec/cli/pcm2pwm_spec.rb +22 -23
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +6 -21
- data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
- data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
- data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
- data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
- data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
- data/spec/data_models/named_model_spec.rb +41 -0
- data/spec/data_models/pcm_spec.rb +114 -45
- data/spec/data_models/pm_spec.rb +132 -333
- data/spec/data_models/ppm_spec.rb +47 -44
- data/spec/data_models/pwm_spec.rb +85 -77
- data/spec/fabricators/motif_formats_fabricator.rb +116 -116
- data/spec/formatters/consensus_formatter_spec.rb +26 -0
- data/spec/formatters/raw_formatter_spec.rb +169 -0
- data/spec/parsers/matrix_parser_spec.rb +216 -0
- data/spec/parsers/motif_splitter_spec.rb +87 -0
- data/spec/spec_helper.rb +2 -2
- data/spec/spec_helper_source.rb +25 -5
- data/spec/support_spec.rb +31 -0
- metadata +43 -124
- data/bin/merge_into_collection +0 -4
- data/lib/bioinform/cli/merge_into_collection.rb +0 -80
- data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
- data/lib/bioinform/data_models/collection.rb +0 -75
- data/lib/bioinform/data_models/motif.rb +0 -56
- data/lib/bioinform/formatters/raw_formatter.rb +0 -41
- data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
- data/lib/bioinform/parsers/parser.rb +0 -92
- data/lib/bioinform/parsers/splittable_parser.rb +0 -57
- data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
- data/lib/bioinform/parsers/string_parser.rb +0 -72
- data/lib/bioinform/parsers/trivial_parser.rb +0 -34
- data/lib/bioinform/parsers/yaml_parser.rb +0 -35
- data/lib/bioinform/support/advanced_scan.rb +0 -8
- data/lib/bioinform/support/array_product.rb +0 -6
- data/lib/bioinform/support/array_zip.rb +0 -6
- data/lib/bioinform/support/collect_hash.rb +0 -7
- data/lib/bioinform/support/deep_dup.rb +0 -5
- data/lib/bioinform/support/delete_many.rb +0 -14
- data/lib/bioinform/support/inverf.rb +0 -13
- data/lib/bioinform/support/multiline_squish.rb +0 -6
- data/lib/bioinform/support/parameters.rb +0 -28
- data/lib/bioinform/support/partial_sums.rb +0 -16
- data/lib/bioinform/support/same_by.rb +0 -12
- data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
- data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
- data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
- data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
- data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
- data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
- data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
- data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
- data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
- data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
- data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
- data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
- data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
- data/spec/cli/data/split_motifs/collection.yaml +0 -188
- data/spec/cli/merge_into_collection_spec.rb +0 -100
- data/spec/data_models/collection_spec.rb +0 -98
- data/spec/data_models/motif_spec.rb +0 -224
- data/spec/fabricators/collection_fabricator.rb +0 -8
- data/spec/fabricators/motif_fabricator.rb +0 -33
- data/spec/fabricators/pcm_fabricator.rb +0 -25
- data/spec/fabricators/pm_fabricator.rb +0 -52
- data/spec/fabricators/ppm_fabricator.rb +0 -14
- data/spec/fabricators/pwm_fabricator.rb +0 -16
- data/spec/parsers/parser_spec.rb +0 -152
- data/spec/parsers/string_fantom_parser_spec.rb +0 -70
- data/spec/parsers/string_parser_spec.rb +0 -77
- data/spec/parsers/trivial_parser_spec.rb +0 -64
- data/spec/parsers/yaml_parser_spec.rb +0 -50
- data/spec/support/advanced_scan_spec.rb +0 -32
- data/spec/support/array_product_spec.rb +0 -15
- data/spec/support/array_zip_spec.rb +0 -15
- data/spec/support/collect_hash_spec.rb +0 -15
- data/spec/support/delete_many_spec.rb +0 -44
- data/spec/support/inverf_spec.rb +0 -19
- data/spec/support/multiline_squish_spec.rb +0 -25
- data/spec/support/partial_sums_spec.rb +0 -30
- data/spec/support/same_by_spec.rb +0 -36
@@ -1,54 +1,57 @@
|
|
1
|
-
|
2
|
-
require_relative '../../lib/bioinform/data_models/pcm'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
describe PPM do
|
6
|
-
describe '#to_ppm' do
|
7
|
-
let (:ppm_motif) { Fabricate(:ppm) }
|
8
|
-
it 'returns self' do
|
9
|
-
ppm_motif.to_ppm.should eq ppm_motif
|
10
|
-
end
|
11
|
-
end
|
1
|
+
require 'bioinform/data_models/ppm'
|
12
2
|
|
13
|
-
|
14
|
-
let (:ppm_motif) { Fabricate(:ppm_by_pcm) }
|
15
|
-
let (:pcm_motif) { Fabricate(:pcm) }
|
3
|
+
describe Bioinform::MotifModel::PPM do
|
16
4
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
5
|
+
describe '.new' do
|
6
|
+
specify 'fails on matrix having negative elements' do
|
7
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, -0.1, -0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error (Bioinform::Error)
|
8
|
+
end
|
9
|
+
specify 'fails on matrix having sum of position elements different from 1' do
|
10
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.6, 0.1, 0.1, 0.6],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error (Bioinform::Error)
|
11
|
+
expect { Bioinform::MotifModel::PPM.new([[0.4, 0.1, 0.1, 0.4],[0.3, 0.1, 0.1, 0.3],[0.25, 0.25, 0.25, 0.25]]) }.to raise_error (Bioinform::Error)
|
12
|
+
expect { Bioinform::MotifModel::PPM.new([[0.3, 0.1, 0.1, 0.3],[0.3, 0.1, 0.1, 0.3],[0.2, 0.2, 0.2, 0.2]]) }.to raise_error (Bioinform::Error)
|
13
|
+
end
|
14
|
+
|
15
|
+
context 'with valid matrix' do
|
16
|
+
let(:matrix) { [[0.4, 0.1, 0.1, 0.4],[0.3, 0.2, 0.2, 0.3],[0.25, 0.25, 0.25, 0.25]] }
|
17
|
+
specify do
|
18
|
+
expect { Bioinform::MotifModel::PPM.new(matrix) }.not_to raise_error
|
23
19
|
end
|
24
|
-
|
25
|
-
|
26
|
-
ppm.to_pcm.name.should == ppm_motif.name
|
20
|
+
specify do
|
21
|
+
expect( Bioinform::MotifModel::PPM.new(matrix).matrix ).to eq matrix
|
27
22
|
end
|
28
23
|
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe '.from_string' do
|
27
|
+
specify {
|
28
|
+
expect( Bioinform::MotifModel::PPM.from_string("0.1 0.2 0.3 0.4\n0.4 0.2 0.2 0.2").model.class ).to eq Bioinform::MotifModel::PPM
|
29
|
+
}
|
30
|
+
end
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
let (:ppm_motif_with_log_pseudocount) { ppm_motif.tap{|ppm| ppm.effective_count = 137 } }
|
34
|
-
let (:pcm_motif) { ppm_motif.to_pcm }
|
32
|
+
context 'valid PPM' do
|
33
|
+
let(:ppm) { Bioinform::MotifModel::PPM.new(matrix) }
|
34
|
+
let(:matrix) { [[0.4, 0.1, 0.1, 0.4],[0.3, 0.2, 0.2, 0.3],[0.25, 0.25, 0.25, 0.25]] }
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
36
|
+
specify { expect(ppm).to eq Bioinform::MotifModel::PPM.new(matrix) }
|
37
|
+
specify { expect(ppm).not_to eq matrix }
|
38
|
+
specify { expect(ppm).not_to eq Bioinform::MotifModel::PM.new(matrix) }
|
39
|
+
specify { expect(ppm).not_to eq Bioinform::MotifModel::PWM.new(matrix) }
|
40
|
+
specify { expect(ppm).not_to eq Bioinform::MotifModel::PCM.new(matrix) }
|
41
|
+
specify { expect(matrix).not_to eq ppm }
|
42
|
+
specify { expect(Bioinform::MotifModel::PM.new(matrix)).not_to eq ppm }
|
43
|
+
specify { expect(Bioinform::MotifModel::PWM.new(matrix)).not_to eq ppm }
|
44
|
+
specify { expect(Bioinform::MotifModel::PCM.new(matrix)).not_to eq ppm }
|
45
|
+
|
46
|
+
specify { expect(ppm.named('motif name')).to be_kind_of Bioinform::MotifModel::NamedModel }
|
47
|
+
specify { expect(ppm.named('motif name').model).to eq ppm }
|
48
|
+
specify { expect(ppm.named('motif name').name).to eq 'motif name' }
|
49
|
+
|
50
|
+
describe '#reversed, #complemented, #reverse_complemented' do
|
51
|
+
specify { expect(ppm.reversed).to be_kind_of Bioinform::MotifModel::PPM }
|
52
|
+
specify { expect(ppm.complemented).to be_kind_of Bioinform::MotifModel::PPM }
|
53
|
+
specify { expect(ppm.reverse_complemented).to be_kind_of Bioinform::MotifModel::PPM }
|
54
|
+
specify { expect(ppm.revcomp).to be_kind_of Bioinform::MotifModel::PPM }
|
52
55
|
end
|
53
56
|
end
|
54
57
|
end
|
@@ -1,96 +1,104 @@
|
|
1
|
-
|
2
|
-
require_relative '../../lib/bioinform/data_models/pwm'
|
1
|
+
require 'bioinform/data_models/pwm'
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
3
|
+
describe Bioinform::MotifModel::PWM do
|
4
|
+
|
5
|
+
describe '.new' do
|
6
|
+
context 'with valid matrix' do
|
7
|
+
let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
|
8
|
+
|
9
|
+
specify do
|
10
|
+
expect { Bioinform::MotifModel::PWM.new(matrix) }.not_to raise_error
|
10
11
|
end
|
11
|
-
|
12
|
-
|
13
|
-
pwm.score_mean.should == ((0.2*1+0.3*2+0.3*1+0.2*2) + (0.2*4+0.3*6+0.3*8+0.2*6) + (0.2*2+0.3*2+0.3*2+0.2*2)) / (0.2+0.3+0.3+0.2)
|
12
|
+
specify do
|
13
|
+
expect( Bioinform::MotifModel::PWM.new(matrix).matrix ).to eq matrix
|
14
14
|
end
|
15
15
|
end
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
18
|
+
context 'valid PWM' do
|
19
|
+
let(:matrix) { [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]] }
|
20
|
+
let(:pwm) { Bioinform::MotifModel::PWM.new(matrix) }
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
+
specify { expect(pwm).to eq Bioinform::MotifModel::PWM.new(matrix) }
|
23
|
+
specify { expect(pwm).not_to eq matrix }
|
24
|
+
specify { expect(pwm).not_to eq Bioinform::MotifModel::PM.new(matrix) }
|
25
|
+
# specify { expect(pwm).not_to eq Bioinform::MotifModel::PCM.new(matrix) }
|
26
|
+
# specify { expect(pwm).not_to eq Bioinform::MotifModel::PPM.new(matrix) }
|
27
|
+
specify { expect(matrix).not_to eq pwm }
|
28
|
+
specify { expect(Bioinform::MotifModel::PM.new(matrix)).not_to eq pwm }
|
29
|
+
# specify { expect(Bioinform::MotifModel::PCM.new(matrix)).not_to eq pwm }
|
30
|
+
# specify { expect(Bioinform::MotifModel::PPM.new(matrix)).not_to eq pwm }
|
31
|
+
|
32
|
+
specify { expect(pwm.named('motif name')).to be_kind_of Bioinform::MotifModel::NamedModel }
|
33
|
+
specify { expect(pwm.named('motif name').model).to eq pwm }
|
34
|
+
specify { expect(pwm.named('motif name').name).to eq 'motif name' }
|
22
35
|
|
23
36
|
describe '#score' do
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
pwm.score('agata').should == 13141
|
30
|
-
pwm.score('CCGCT').should == 22324
|
31
|
-
end
|
32
|
-
it 'should give score average score(considering probabilities) for a position for a N-letter' do
|
33
|
-
pwm.score('AANAA').should == (11011 + 250)
|
34
|
-
pwm.set_parameters(background: [0.1,0.4,0.1,0.4]).score('AANAA').should == (11011 + 0.1*100 + 0.4*200 + 0.1*300 + 0.4*400)
|
35
|
-
end
|
36
|
-
it 'should raise an ArgumentError if word contain bad letter' do
|
37
|
-
expect{ pwm.score('AAAAV') }.to raise_error(ArgumentError)
|
38
|
-
end
|
39
|
-
it 'should raise an ArgumentError if word has size different than size of matrix' do
|
40
|
-
expect{ pwm.score('AAA') }.to raise_error(ArgumentError)
|
41
|
-
end
|
37
|
+
specify { expect( pwm.score('acT') ).to eq(1 + (-11) + 0.321) }
|
38
|
+
specify { expect{ pwm.score('acTt') }.to raise_error(Bioinform::Error) }
|
39
|
+
specify { expect{ pwm.score('ac') }.to raise_error(Bioinform::Error) }
|
40
|
+
specify { expect{ pwm.score('acW') }.to raise_error(Bioinform::Error) }
|
41
|
+
specify { expect{ pwm.score('acN') }.to raise_error(Bioinform::Error) }
|
42
42
|
end
|
43
43
|
|
44
|
-
describe '#
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
44
|
+
describe '#reversed, #complemented, #reverse_complemented' do
|
45
|
+
specify { expect(pwm.reversed).to be_kind_of Bioinform::MotifModel::PWM }
|
46
|
+
specify { expect(pwm.complemented).to be_kind_of Bioinform::MotifModel::PWM }
|
47
|
+
specify { expect(pwm.reverse_complemented).to be_kind_of Bioinform::MotifModel::PWM }
|
48
|
+
specify { expect(pwm.revcomp).to be_kind_of Bioinform::MotifModel::PWM }
|
49
49
|
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
50
|
+
|
51
|
+
describe '#discreted' do
|
52
|
+
specify { expect(pwm.discreted(1)).to eq Bioinform::MotifModel::PWM.new( [[1,2,3,2],[12,-11,12,0],[-1, 1, 1, 1]]) }
|
53
|
+
specify { expect(pwm.discreted(1, rounding_method: :round)).to eq Bioinform::MotifModel::PWM.new( [[1,2,3,2],[12,-11,12,0],[-1, 1, 0, 0]]) }
|
54
|
+
specify { expect(pwm.discreted(10)).to eq Bioinform::MotifModel::PWM.new( [[10,20,30,16],[120,-110,120,0],[-11, 6, 4, 4]]) }
|
55
|
+
specify { expect(pwm.discreted(10, rounding_method: :round)).to eq Bioinform::MotifModel::PWM.new( [[10,20,30,16],[120,-110,120,0],[-11, 6, 4, 3]]) }
|
56
|
+
specify { expect(pwm.discreted(10, rounding_method: :floor)).to eq Bioinform::MotifModel::PWM.new( [[10,20,30,15],[120,-110,120,0],[-11, 6, 4, 3]]) }
|
55
57
|
end
|
56
58
|
|
57
|
-
describe '#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
@pwm.best_suffix(1).should == (7.13 - 1.0)
|
62
|
-
@pwm.best_suffix(2).should == (-1.0)
|
63
|
-
@pwm.best_suffix(3).should == (0.0)
|
64
|
-
end
|
65
|
-
it 'should give right results after left(right)_augment, discrete, reverse_complement etc' do
|
66
|
-
pwm = PWM.new([[1, 2, 3, 4], [10,10.5,11,11.5]])
|
67
|
-
pwm.best_suffix(1).should == 11.5
|
68
|
-
pwm.left_augment!(1)
|
69
|
-
pwm.best_suffix(1).should == 15.5
|
70
|
-
end
|
59
|
+
describe '#left_augmented' do
|
60
|
+
specify { expect{pwm.left_augmented(-1)}.to raise_error Bioinform::Error }
|
61
|
+
specify { expect(pwm.left_augmented(0)).to eq pwm }
|
62
|
+
specify { expect(pwm.left_augmented(2)).to eq Bioinform::MotifModel::PWM.new( [[0,0,0,0],[0,0,0,0],[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321]]) }
|
71
63
|
end
|
72
|
-
describe '#
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
@pwm.worst_suffix(1).should == (3.25 - 1.5)
|
77
|
-
@pwm.worst_suffix(2).should == (- 1.5)
|
78
|
-
@pwm.worst_suffix(3).should == (0.0)
|
79
|
-
end
|
64
|
+
describe '#right_augmented' do
|
65
|
+
specify { expect{pwm.right_augmented(-1)}.to raise_error Bioinform::Error }
|
66
|
+
specify { expect(pwm.right_augmented(0)).to eq pwm }
|
67
|
+
specify { expect(pwm.right_augmented(2)).to eq Bioinform::MotifModel::PWM.new( [[1,2,3,1.567],[12,-11,12,0],[-1.1, 0.6, 0.4, 0.321],[0,0,0,0],[0,0,0,0]]) }
|
80
68
|
end
|
69
|
+
end
|
81
70
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
71
|
+
context 'with different alphabet' do
|
72
|
+
# A C G T M R W S Y K B D H V N
|
73
|
+
let(:matrix) { [[1, 2, 3, 1.567, 10,20,30,40,50,60, 700,800,900,1000, 10000],
|
74
|
+
[12, -11, 12, 0, 11,21,31,41,51,61, 701,801,901,1001, 10001 ],
|
75
|
+
[-1.1, 0.6, 0.4, 0.321, 12,22,32,42,52,62, 702,802,902,1002, 10002 ]] }
|
76
|
+
let(:pwm) { Bioinform::MotifModel::PWM.new(matrix, alphabet: Bioinform::IUPACAlphabet) }
|
77
|
+
describe '#score' do
|
78
|
+
specify { expect( pwm.score('acT') ).to eq(1 + (-11) + 0.321) }
|
79
|
+
specify { expect{ pwm.score('acTt') }.to raise_error(Bioinform::Error) }
|
80
|
+
specify { expect{ pwm.score('ac') }.to raise_error(Bioinform::Error) }
|
81
|
+
specify { expect( pwm.score('acW') ).to eq(1 + (-11) + 32) }
|
82
|
+
specify { expect( pwm.score('acN') ).to eq(1 + (-11) + 10002) }
|
94
83
|
end
|
84
|
+
specify { expect(pwm.left_augmented(1)).to be_kind_of Bioinform::MotifModel::PWM }
|
85
|
+
specify { expect(pwm.left_augmented(1).alphabet).to eq Bioinform::IUPACAlphabet }
|
86
|
+
specify { expect(pwm.left_augmented(1).matrix).to eq [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
87
|
+
[1, 2, 3, 1.567, 10,20,30,40,50,60, 700,800,900,1000, 10000],
|
88
|
+
[12, -11, 12, 0, 11,21,31,41,51,61, 701,801,901,1001, 10001 ],
|
89
|
+
[-1.1, 0.6, 0.4, 0.321, 12,22,32,42,52,62, 702,802,902,1002, 10002 ]] }
|
90
|
+
|
91
|
+
specify { expect(pwm.discreted(1).matrix).to eq [[1, 2, 3, 2, 10,20,30,40,50,60, 700,800,900,1000, 10000],
|
92
|
+
[12,-11,12,0, 11,21,31,41,51,61, 701,801,901,1001, 10001 ],
|
93
|
+
[-1, 1, 1, 1, 12,22,32,42,52,62, 702,802,902,1002, 10002 ]] }
|
94
|
+
specify { expect(pwm.discreted(1).alphabet).to eq Bioinform::IUPACAlphabet}
|
95
|
+
|
96
|
+
specify { expect{ pwm.to_IUPAC_PWM }.to raise_error }
|
97
|
+
end
|
98
|
+
|
99
|
+
describe '.from_string' do
|
100
|
+
specify {
|
101
|
+
expect( Bioinform::MotifModel::PWM.from_string("1 2 3 4\n5 6 7 8").model.class ).to eq Bioinform::MotifModel::PWM
|
102
|
+
}
|
95
103
|
end
|
96
104
|
end
|
@@ -1,125 +1,125 @@
|
|
1
|
-
require_relative '../../lib/bioinform/support/strip_doc'
|
2
|
-
require 'ostruct'
|
1
|
+
# require_relative '../../lib/bioinform/support/strip_doc'
|
2
|
+
# require 'ostruct'
|
3
3
|
|
4
|
-
Fabricator(:SP1_f1_plain_text, class_name: OpenStruct) do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
4
|
+
# Fabricator(:SP1_f1_plain_text, class_name: OpenStruct) do
|
5
|
+
# name 'SP1_f1'
|
6
|
+
# pcm (<<-EOS).strip_doc
|
7
|
+
# SP1_f1
|
8
|
+
# 682.6436366358055 443.1455214015781 2075.655346294993 287.211468117951
|
9
|
+
# 299.8883246804867 103.74338315843572 2613.8927022405364 471.1315623708902
|
10
|
+
# 591.4892493324709 42.631827541794564 2845.1654083148564 9.36948726124641
|
11
|
+
# 7.071084742361592 45.29093411231232 3432.8847704374107 3.409183158303573
|
12
|
+
# 91.308984085713 19.1536481364332 3373.656949880137 4.5363903481026
|
13
|
+
# 809.2082973387932 2246.941954176211 61.30766021687515 371.19806071846244
|
14
|
+
# 120.56476435866055 42.4349244403591 3242.1560628684038 83.50022078295852
|
15
|
+
# 13.72524477409959 35.858220519297525 3332.4066864946167 106.66582066236779
|
16
|
+
# 558.1188080161639 90.0084504200356 2694.854973210736 145.67374080342415
|
17
|
+
# 264.0088462230318 254.7175868081866 2796.88087480315 173.0486646159857
|
18
|
+
# 519.46013914282 1874.9349086474765 654.5411208373813 439.7198038226514
|
19
|
+
# EOS
|
20
|
+
# pwm (<<-EOS).strip_doc
|
21
|
+
# SP1_f1
|
22
|
+
# -0.24435707885585292 -0.674823404693731 0.8657012535789866 -1.1060188862599287
|
23
|
+
# -1.0631255752097797 -2.111925969423868 1.0960627561110403 -0.6138563775211977
|
24
|
+
# -0.3872276234760535 -2.9739851913218045 1.1807800242010378 -4.338927525031566
|
25
|
+
# -4.563896055436894 -2.9161633002532277 1.3684371349982638 -5.077972423609655
|
26
|
+
# -2.2369752892820083 -3.7196436313301846 1.3510439136452734 -4.889930670508233
|
27
|
+
# -0.07473964149330865 0.944919654762011 -2.6246857648086044 -0.8510983487822436
|
28
|
+
# -1.9643526491643322 -2.978402770880115 1.3113096718240573 -2.324334259499025
|
29
|
+
# -4.0155484139655835 -3.1384268078096667 1.3387488589788057 -2.084673903537648
|
30
|
+
# -0.44509385828355363 -2.2510053061629702 1.1265431574368685 -1.7780413702431372
|
31
|
+
# -1.1896356092245048 -1.2251832285630027 1.1636760063747527 -1.6080243648157353
|
32
|
+
# -0.5166047365590571 0.7641033353626657 -0.2862677570028208 -0.68254820978656
|
33
|
+
# EOS
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
35
|
+
# pwm_by_ppm (<<-EOS).strip_doc
|
36
|
+
# SP1_f1
|
37
|
+
# -0.24500451019749314 -0.6770792648706158 0.8670547406179426 -1.1107587045732945
|
38
|
+
# -1.0675673174344313 -2.129057091432141 1.097618374273881 -0.6158400910824666
|
39
|
+
# -0.3883339637317867 -3.018376372012639 1.1823990131526274 -4.533519098288315
|
40
|
+
# -4.814963574678032 -2.957870589233678 1.3701787559041199 -5.544504752142028
|
41
|
+
# -2.256728287148141 -3.818484271031738 1.3527751116079707 -5.258845853421905
|
42
|
+
# -0.07492111272663467 0.9463479675413448 -2.6550726738330934 -0.8542416951821531
|
43
|
+
# -1.9787903996167093 -3.023005758900076 1.313016367251334 -2.3461281957623594
|
44
|
+
# -4.151740650307917 -3.191404627775074 1.340472578885988 -2.1012766959190534
|
45
|
+
# -0.44640561441369214 -2.271073912244469 1.1281221968922746 -1.789608002606767
|
46
|
+
# -1.1949948607174645 -1.2308220430118417 1.1652826302263302 -1.6174046184238486
|
47
|
+
# -0.5181873934002441 0.7653517506909445 -0.2870430604819932 -0.6848397571539165
|
48
|
+
# EOS
|
49
49
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
end
|
50
|
+
# ppm (<<-EOS).strip_doc
|
51
|
+
# SP1_f1
|
52
|
+
# 0.1956752520244457 0.12702471235371654 0.5949727811186595 0.08232725450317827
|
53
|
+
# 0.0859609910087678 0.029737349849824683 0.7492549345313109 0.13504672461010295
|
54
|
+
# 0.1695464539935781 0.012220129436222756 0.8155477154477051 0.002685701122505801
|
55
|
+
# 0.0020268793478638923 0.012982344624970667 0.9840135563800679 0.0009772196471150077
|
56
|
+
# 0.0261731121689194 0.005490265674714912 0.9670362960755289 0.0013003260808535316
|
57
|
+
# 0.2319541690923538 0.6440709464963454 0.017573432491199324 0.10640145192010551
|
58
|
+
# 0.03455908674020943 0.012163688473573988 0.9293424426115627 0.02393478217466954
|
59
|
+
# 0.003934250004152571 0.010278520095551809 0.9552121828034635 0.030575047096847708
|
60
|
+
# 0.15998103923791546 0.025800322855227358 0.7724622302949381 0.04175640761192835
|
61
|
+
# 0.07567637746682139 0.07301309983548783 0.8017072754923166 0.04960324720538194
|
62
|
+
# 0.14889978927270572 0.537437604468227 0.1876198530340185 0.1260427532250494
|
63
|
+
# EOS
|
64
|
+
# end
|
65
65
|
|
66
66
|
|
67
|
-
Fabricator(:KLF4_f2_plain_text, class_name: OpenStruct) do
|
68
|
-
|
67
|
+
# Fabricator(:KLF4_f2_plain_text, class_name: OpenStruct) do
|
68
|
+
# name 'KLF4_f2'
|
69
69
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
70
|
+
# pcm (<<-EOS).strip_doc
|
71
|
+
# KLF4_f2
|
72
|
+
# 1233.46088405354 93.18173277811673 1036.6014857092885 1258.2948629970272
|
73
|
+
# 263.979242343185 5.314520555872139 3347.5949971525274 4.650205486388122
|
74
|
+
# 76.7700780003465 6.643150694840173 3529.4896409394937 8.636095903292224
|
75
|
+
# 57.86097393406657 18.102585643439472 3520.3342027139347 25.24120324653207
|
76
|
+
# 518.1947904009378 1545.9062946905135 22.396758181071043 1535.0411222654507
|
77
|
+
# 137.98151691820345 9.300410972776241 3456.320530770924 17.936506876068467
|
78
|
+
# 115.27647661640499 81.51802997128804 1861.9425868567278 1562.801872093553
|
79
|
+
# 227.8095486111286 42.84555258785854 3278.6396005325996 72.244263806387
|
80
|
+
# 108.73384179997886 134.47328134862394 3162.880454846513 215.45138754285665
|
81
|
+
# 238.49636899561344 2225.9561104691043 402.40727964384774 754.6792064294074
|
82
|
+
# EOS
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
84
|
+
# pwm (<<-EOS).strip_doc
|
85
|
+
# KLF4_f2
|
86
|
+
# 0.30861857265872605 -2.254321000121579 0.13505703522674192 0.3285194224375633
|
87
|
+
# -1.227018967707036 -4.814127713368663 1.3059890687390967 -4.908681463544344
|
88
|
+
# -2.443469374521196 -4.648238485031404 1.3588686548279805 -4.441801801188402
|
89
|
+
# -2.7177827948276123 -3.8073538975356565 1.356272809724262 -3.504104725510225
|
90
|
+
# -0.5563232977367343 0.5340697765121405 -3.61417723090579 0.5270259776377405
|
91
|
+
# -1.8687622060887386 -4.381483976582316 1.337932245336098 -3.815629658877517
|
92
|
+
# -2.045671123823928 -2.384975142213679 0.7198551207724355 0.5449254135616948
|
93
|
+
# -1.373157530374372 -3.0063112097748217 1.285188335493552 -2.5026044231773543
|
94
|
+
# -2.1030513122772208 -1.8941348100402244 1.249265758393991 -1.4284210948906104
|
95
|
+
# -1.3277128628152939 0.8982415633049462 -0.8080773665408135 -0.18161647647456935
|
96
|
+
# EOS
|
97
97
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
98
|
+
# pwm_by_ppm (<<-EOS).strip_doc
|
99
|
+
# KLF4_f2
|
100
|
+
# 0.3092192421596327 -2.2738082797138253 0.13534285704681936 0.32915281813495917
|
101
|
+
# -1.2324895093929382 -5.137917180091503 1.307637473739625 -5.271448572716026
|
102
|
+
# -2.4675450270943275 -4.914773628777294 1.3605485798774357 -4.652409364309802
|
103
|
+
# -2.7503168499566213 -3.9123052006620864 1.3579512260780577 -3.5798822732152016
|
104
|
+
# -0.5580087671420197 0.5350056338953667 -3.699443757631887 0.5279524674519223
|
105
|
+
# -1.8812402411482916 -4.578301392156081 1.3395998898354127 -3.9215218557670104
|
106
|
+
# -2.0610265944380752 -2.4075357595386326 0.7210156412752609 0.5458755791300577
|
107
|
+
# -1.3798500151830562 -3.0507581325511492 1.2868238774479581 -2.5283070518025443
|
108
|
+
# -2.1194569041801143 -1.906994454335074 1.2508784457678146 -1.435624677710707
|
109
|
+
# -1.3340058960845615 0.899581832910703 -0.8108952727718963 -0.18206721514971644
|
110
|
+
# EOS
|
111
111
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
end
|
112
|
+
# ppm (<<-EOS).strip_doc
|
113
|
+
# KLF4_f2
|
114
|
+
# 0.3405902561841722 0.025729871655343288 0.28623231603288946 0.34744755612759504
|
115
|
+
# 0.07289145439416013 0.001467475735162407 0.9243570286024104 0.0012840412682671063
|
116
|
+
# 0.02119819190981491 0.0018343446689530088 0.9745828153515933 0.002384648069638911
|
117
|
+
# 0.015976902218825483 0.004998589222896949 0.9720547635170884 0.006969745041189289
|
118
|
+
# 0.14308690182046985 0.4268644654665126 0.006184320642189763 0.423864312070828
|
119
|
+
# 0.038100243634326486 0.0025680825365342118 0.9543789432229661 0.004952730606173124
|
120
|
+
# 0.03183079837421573 0.02250922349503941 0.5141302094426423 0.431529768688103
|
121
|
+
# 0.06290407221320286 0.011830758413914766 0.9053166710980188 0.019948498274864
|
122
|
+
# 0.030024208723052264 0.03713152961441303 0.8733525953866614 0.05949166627587335
|
123
|
+
# 0.06585497802594685 0.6146437002752069 0.11111499378388463 0.20838632791496178
|
124
|
+
# EOS
|
125
|
+
# end
|