bioinform 0.1.12 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +17 -17
- data/Gemfile +16 -16
- data/LICENSE +21 -21
- data/README.md +35 -35
- data/Rakefile +4 -4
- data/TODO.txt +37 -37
- data/bin/merge_into_collection +3 -3
- data/bin/pcm2pwm +3 -3
- data/bin/split_motifs +3 -3
- data/bioinform.gemspec +19 -19
- data/lib/bioinform/cli/convert_motif.rb +107 -107
- data/lib/bioinform/cli/merge_into_collection.rb +79 -79
- data/lib/bioinform/cli/pcm2pwm.rb +46 -46
- data/lib/bioinform/cli/split_motifs.rb +46 -46
- data/lib/bioinform/cli.rb +29 -29
- data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +18 -18
- data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +19 -19
- data/lib/bioinform/data_models/collection.rb +74 -74
- data/lib/bioinform/data_models/motif.rb +55 -55
- data/lib/bioinform/data_models/pcm.rb +23 -23
- data/lib/bioinform/data_models/pm.rb +169 -169
- data/lib/bioinform/data_models/ppm.rb +9 -9
- data/lib/bioinform/data_models/pwm.rb +55 -55
- data/lib/bioinform/data_models.rb +10 -10
- data/lib/bioinform/formatters/raw_formatter.rb +40 -40
- data/lib/bioinform/formatters/transfac_formatter.rb +38 -38
- data/lib/bioinform/formatters.rb +1 -1
- data/lib/bioinform/parsers/jaspar_parser.rb +34 -34
- data/lib/bioinform/parsers/parser.rb +87 -87
- data/lib/bioinform/parsers/splittable_parser.rb +56 -56
- data/lib/bioinform/parsers/string_fantom_parser.rb +34 -34
- data/lib/bioinform/parsers/string_parser.rb +71 -71
- data/lib/bioinform/parsers/trivial_parser.rb +33 -33
- data/lib/bioinform/parsers/yaml_parser.rb +34 -34
- data/lib/bioinform/parsers.rb +6 -6
- data/lib/bioinform/support/array_product.rb +5 -5
- data/lib/bioinform/support/array_zip.rb +5 -5
- data/lib/bioinform/support/collect_hash.rb +6 -6
- data/lib/bioinform/support/deep_dup.rb +4 -4
- data/lib/bioinform/support/delete_many.rb +13 -13
- data/lib/bioinform/support/inverf.rb +12 -12
- data/lib/bioinform/support/multiline_squish.rb +5 -5
- data/lib/bioinform/support/parameters.rb +27 -27
- data/lib/bioinform/support/partial_sums.rb +15 -15
- data/lib/bioinform/support/same_by.rb +12 -12
- data/lib/bioinform/support/strip_doc.rb +8 -8
- data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +3 -0
- data/lib/bioinform/support.rb +17 -17
- data/lib/bioinform/version.rb +3 -3
- data/lib/bioinform.rb +10 -10
- data/spec/cli/cli_spec.rb +13 -13
- data/spec/cli/convert_motif_spec.rb +106 -106
- data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -14
- data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +11 -11
- data/spec/cli/data/merge_into_collection/SP1_f1.pwm +12 -12
- data/spec/cli/data/merge_into_collection/collection.txt.result +40 -40
- data/spec/cli/data/merge_into_collection/collection.yaml.result +188 -188
- data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +188 -188
- data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -14
- data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -11
- data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -12
- data/spec/cli/data/pcm2pwm/KLF4 f2 spaced name.pcm +11 -11
- data/spec/cli/data/pcm2pwm/KLF4_f2.pcm +11 -11
- data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -11
- data/spec/cli/data/pcm2pwm/SP1_f1.pcm +12 -12
- data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -12
- data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -14
- data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -11
- data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -12
- data/spec/cli/data/split_motifs/collection.yaml +188 -188
- data/spec/cli/data/split_motifs/plain_collection.txt +38 -38
- data/spec/cli/merge_into_collection_spec.rb +99 -99
- data/spec/cli/pcm2pwm_spec.rb +79 -79
- data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +17 -17
- data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -14
- data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +49 -49
- data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +4 -4
- data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
- data/spec/cli/split_motifs_spec.rb +76 -76
- data/spec/data_models/collection_spec.rb +97 -97
- data/spec/data_models/motif_spec.rb +223 -223
- data/spec/data_models/pcm_spec.rb +55 -55
- data/spec/data_models/pm_spec.rb +359 -359
- data/spec/data_models/ppm_spec.rb +7 -7
- data/spec/data_models/pwm_spec.rb +82 -82
- data/spec/fabricators/collection_fabricator.rb +7 -7
- data/spec/fabricators/motif_fabricator.rb +32 -32
- data/spec/fabricators/motif_formats_fabricator.rb +124 -124
- data/spec/fabricators/pcm_fabricator.rb +24 -24
- data/spec/fabricators/pm_fabricator.rb +51 -51
- data/spec/fabricators/ppm_fabricator.rb +13 -13
- data/spec/fabricators/pwm_fabricator.rb +16 -16
- data/spec/parsers/parser_spec.rb +152 -152
- data/spec/parsers/string_fantom_parser_spec.rb +69 -69
- data/spec/parsers/string_parser_spec.rb +76 -76
- data/spec/parsers/trivial_parser_spec.rb +63 -63
- data/spec/parsers/yaml_parser_spec.rb +50 -50
- data/spec/spec_helper.rb +10 -10
- data/spec/spec_helper_source.rb +59 -59
- data/spec/support/advanced_scan_spec.rb +31 -31
- data/spec/support/array_product_spec.rb +14 -14
- data/spec/support/array_zip_spec.rb +14 -14
- data/spec/support/collect_hash_spec.rb +14 -14
- data/spec/support/delete_many_spec.rb +43 -43
- data/spec/support/inverf_spec.rb +18 -18
- data/spec/support/multiline_squish_spec.rb +24 -24
- data/spec/support/partial_sums_spec.rb +30 -30
- data/spec/support/same_by_spec.rb +35 -35
- metadata +3 -3
@@ -1,52 +1,52 @@
|
|
1
|
-
Fabricator(:pm, class_name: Bioinform::PM) do
|
2
|
-
initialize_with{ Bioinform::PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name') }
|
3
|
-
end
|
4
|
-
|
5
|
-
Fabricator(:pm_unnamed, from: :pm) do
|
6
|
-
name nil
|
7
|
-
end
|
8
|
-
|
9
|
-
|
10
|
-
Fabricator(:pm_first, from: :pm) do
|
11
|
-
name 'PM_first'
|
12
|
-
end
|
13
|
-
|
14
|
-
Fabricator(:pm_second, from: :pm) do
|
15
|
-
matrix [[15,16,17,18],[11,21,31,41]]
|
16
|
-
name 'PM_second'
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
Fabricator(:pm_4x4, from: :pm) do
|
21
|
-
matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
|
22
|
-
end
|
23
|
-
|
24
|
-
Fabricator(:pm_4x4_unnamed, from: :pm_4x4) do
|
25
|
-
name nil
|
26
|
-
end
|
27
|
-
|
28
|
-
Fabricator(:pm_with_floats, from: :pm_unnamed) do
|
29
|
-
matrix [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]
|
30
|
-
end
|
31
|
-
|
32
|
-
Fabricator(:pm_1, from: :pm) do
|
33
|
-
matrix [[0,1,2,3],[4,5,6,7]]
|
34
|
-
name 'motif_1'
|
35
|
-
end
|
36
|
-
Fabricator(:pm_2, from: :pm) do
|
37
|
-
matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12]]
|
38
|
-
name 'motif_2'
|
39
|
-
end
|
40
|
-
Fabricator(:pm_3, from: :pm) do
|
41
|
-
matrix [[2,3,4,5],[6,7,8,9]]
|
42
|
-
name 'motif_3'
|
43
|
-
end
|
44
|
-
|
45
|
-
Fabricator(:pm_4,from: :pm) do
|
46
|
-
matrix [[1,0,1,0],[0,0,0,0],[1,2,3,4]]
|
47
|
-
name 'pm 4'
|
48
|
-
end
|
49
|
-
Fabricator(:pm_5, from: :pm) do
|
50
|
-
matrix [[1,2,1,2],[0,3,6,9],[1,2,3,4]]
|
51
|
-
name 'pm 5'
|
1
|
+
Fabricator(:pm, class_name: Bioinform::PM) do
|
2
|
+
initialize_with{ Bioinform::PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name') }
|
3
|
+
end
|
4
|
+
|
5
|
+
Fabricator(:pm_unnamed, from: :pm) do
|
6
|
+
name nil
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
Fabricator(:pm_first, from: :pm) do
|
11
|
+
name 'PM_first'
|
12
|
+
end
|
13
|
+
|
14
|
+
Fabricator(:pm_second, from: :pm) do
|
15
|
+
matrix [[15,16,17,18],[11,21,31,41]]
|
16
|
+
name 'PM_second'
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
Fabricator(:pm_4x4, from: :pm) do
|
21
|
+
matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
|
22
|
+
end
|
23
|
+
|
24
|
+
Fabricator(:pm_4x4_unnamed, from: :pm_4x4) do
|
25
|
+
name nil
|
26
|
+
end
|
27
|
+
|
28
|
+
Fabricator(:pm_with_floats, from: :pm_unnamed) do
|
29
|
+
matrix [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]
|
30
|
+
end
|
31
|
+
|
32
|
+
Fabricator(:pm_1, from: :pm) do
|
33
|
+
matrix [[0,1,2,3],[4,5,6,7]]
|
34
|
+
name 'motif_1'
|
35
|
+
end
|
36
|
+
Fabricator(:pm_2, from: :pm) do
|
37
|
+
matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12]]
|
38
|
+
name 'motif_2'
|
39
|
+
end
|
40
|
+
Fabricator(:pm_3, from: :pm) do
|
41
|
+
matrix [[2,3,4,5],[6,7,8,9]]
|
42
|
+
name 'motif_3'
|
43
|
+
end
|
44
|
+
|
45
|
+
Fabricator(:pm_4,from: :pm) do
|
46
|
+
matrix [[1,0,1,0],[0,0,0,0],[1,2,3,4]]
|
47
|
+
name 'pm 4'
|
48
|
+
end
|
49
|
+
Fabricator(:pm_5, from: :pm) do
|
50
|
+
matrix [[1,2,1,2],[0,3,6,9],[1,2,3,4]]
|
51
|
+
name 'pm 5'
|
52
52
|
end
|
@@ -1,14 +1,14 @@
|
|
1
|
-
Fabricator(:ppm, class_name: Bioinform::PPM) do
|
2
|
-
initialize_with{ Bioinform::PPM.new(matrix: [[0.2, 0.3, 0.3, 0.2],[0.7, 0.2, 0.0, 0.1]]) }
|
3
|
-
name 'PPM_name'
|
4
|
-
end
|
5
|
-
|
6
|
-
# It has the same name as original pcm because PCM#to_ppm doesn't change the name
|
7
|
-
Fabricator(:ppm_by_pcm, class_name: Bioinform::PPM) do
|
8
|
-
initialize_with{ Fabricate(:pcm).to_ppm }
|
9
|
-
end
|
10
|
-
|
11
|
-
Fabricator(:ppm_pcm_divided_by_count, from: :ppm) do
|
12
|
-
# this matrix should be initialized manually - it's used for spec checking PCM#to_ppm
|
13
|
-
matrix [[1.0/7.0, 2.0/7.0, 3.0/7.0, 1.0/7.0], [4.0/7.0, 0.0/7.0, 1.0/7.0, 2.0/7.0]]
|
1
|
+
Fabricator(:ppm, class_name: Bioinform::PPM) do
|
2
|
+
initialize_with{ Bioinform::PPM.new(matrix: [[0.2, 0.3, 0.3, 0.2],[0.7, 0.2, 0.0, 0.1]]) }
|
3
|
+
name 'PPM_name'
|
4
|
+
end
|
5
|
+
|
6
|
+
# It has the same name as original pcm because PCM#to_ppm doesn't change the name
|
7
|
+
Fabricator(:ppm_by_pcm, class_name: Bioinform::PPM) do
|
8
|
+
initialize_with{ Fabricate(:pcm).to_ppm }
|
9
|
+
end
|
10
|
+
|
11
|
+
Fabricator(:ppm_pcm_divided_by_count, from: :ppm) do
|
12
|
+
# this matrix should be initialized manually - it's used for spec checking PCM#to_ppm
|
13
|
+
matrix [[1.0/7.0, 2.0/7.0, 3.0/7.0, 1.0/7.0], [4.0/7.0, 0.0/7.0, 1.0/7.0, 2.0/7.0]]
|
14
14
|
end
|
@@ -1,16 +1,16 @@
|
|
1
|
-
Fabricator(:pwm, class_name: Bioinform::PWM) do
|
2
|
-
initialize_with{ Bioinform::PWM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PWM_name')}
|
3
|
-
end
|
4
|
-
|
5
|
-
# It has name 'PCM_name' because name isn't converted during #to_pwm
|
6
|
-
Fabricator(:pwm_by_pcm, class_name: Bioinform::PWM) do
|
7
|
-
initialize_with{ Fabricate(:pcm).to_pwm }
|
8
|
-
end
|
9
|
-
|
10
|
-
Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_1, from: :pwm) do
|
11
|
-
matrix [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
|
12
|
-
end
|
13
|
-
|
14
|
-
Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_10, from: :pwm) do
|
15
|
-
matrix [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
|
16
|
-
end
|
1
|
+
Fabricator(:pwm, class_name: Bioinform::PWM) do
|
2
|
+
initialize_with{ Bioinform::PWM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PWM_name')}
|
3
|
+
end
|
4
|
+
|
5
|
+
# It has name 'PCM_name' because name isn't converted during #to_pwm
|
6
|
+
Fabricator(:pwm_by_pcm, class_name: Bioinform::PWM) do
|
7
|
+
initialize_with{ Fabricate(:pcm).to_pwm }
|
8
|
+
end
|
9
|
+
|
10
|
+
Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_1, from: :pwm) do
|
11
|
+
matrix [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
|
12
|
+
end
|
13
|
+
|
14
|
+
Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_10, from: :pwm) do
|
15
|
+
matrix [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
|
16
|
+
end
|
data/spec/parsers/parser_spec.rb
CHANGED
@@ -1,152 +1,152 @@
|
|
1
|
-
require_relative '../spec_helper'
|
2
|
-
require_relative '../../lib/bioinform/parsers/parser'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
describe Parser do
|
6
|
-
context '#initialize' do
|
7
|
-
it 'should accept an array correctly' do
|
8
|
-
Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
|
9
|
-
end
|
10
|
-
it 'should treat several arguments as an array composed of them' do
|
11
|
-
Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
|
12
|
-
end
|
13
|
-
it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
|
14
|
-
Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
context '::parse!' do
|
19
|
-
it 'should behave like Parser.new(input).parse!' do
|
20
|
-
Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
|
21
|
-
expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
context '::parse' do
|
26
|
-
it 'should behave like Parser.new(input).parse!' do
|
27
|
-
Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
|
28
|
-
Parser.parse([1,2,3],[4,5,6]).should be_nil
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
context '::choose' do
|
33
|
-
it 'should create parser of appropriate type' do
|
34
|
-
Parser.choose([[1,2,3,4],[5,6,7,8]]).should be_kind_of(Parser)
|
35
|
-
Parser.choose([[1,2,3,4],[5,6,7,8]]).input.should == [[1,2,3,4],[5,6,7,8]]
|
36
|
-
Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').should be_kind_of(TrivialParser)
|
37
|
-
Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').input.should == {matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}
|
38
|
-
Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
|
39
|
-
Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
context '::split_on_motifs' do
|
44
|
-
it 'should be able to get a single PM' do
|
45
|
-
Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
context '::normalize_hash_keys' do
|
50
|
-
it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
|
51
|
-
Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
context '::need_transpose?' do
|
56
|
-
it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
|
57
|
-
Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
|
58
|
-
Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
|
59
|
-
end
|
60
|
-
end
|
61
|
-
context '::array_from_acgt_hash' do
|
62
|
-
it 'should convert hash of arrays to a transposed array of arrays' do
|
63
|
-
input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
|
64
|
-
Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
|
65
|
-
end
|
66
|
-
it 'should convert hash of numbers to an array of numbers' do
|
67
|
-
input = {A: 1, C: 2, G: 3, T: 4}
|
68
|
-
Parser.array_from_acgt_hash(input).should == [1,2,3,4]
|
69
|
-
end
|
70
|
-
it 'should process both symbolic and string keys, in both upcase and downcase' do
|
71
|
-
input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
|
72
|
-
input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
|
73
|
-
Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
context '::try_convert_to_array' do
|
78
|
-
it 'should not change array' do
|
79
|
-
inputs = []
|
80
|
-
inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
|
81
|
-
inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
|
82
|
-
inputs.each do |input|
|
83
|
-
Parser.try_convert_to_array( input ).should == input
|
84
|
-
end
|
85
|
-
end
|
86
|
-
it 'should convert ACGT-Hashes to an array of positions (not letters)' do
|
87
|
-
Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
context '#parse' do
|
92
|
-
it 'should give the same result as #parse!' do
|
93
|
-
parser = Parser.new('stub parser')
|
94
|
-
parser.stub(:parse!).and_return('stub result')
|
95
|
-
parser.parse.should == 'stub result'
|
96
|
-
end
|
97
|
-
it 'should return nil if #parse! raised an exception' do
|
98
|
-
parser = Parser.new('stub parser')
|
99
|
-
parser.stub(:parse!).and_raise
|
100
|
-
parser.parse.should be_nil
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
good_cases = {
|
105
|
-
'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
|
106
|
-
result: Fabricate(:pm_unnamed) },
|
107
|
-
|
108
|
-
'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
|
109
|
-
result: Fabricate(:pm_unnamed) },
|
110
|
-
|
111
|
-
'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
|
112
|
-
result: Fabricate(:pm_unnamed) },
|
113
|
-
|
114
|
-
'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
|
115
|
-
result: Fabricate(:pm_unnamed) },
|
116
|
-
|
117
|
-
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
|
118
|
-
result: Fabricate(:pm_4x4_unnamed) },
|
119
|
-
|
120
|
-
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
|
121
|
-
result: Fabricate(:pm_4x4_unnamed) },
|
122
|
-
|
123
|
-
'4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
|
124
|
-
{:A => 5, :c => 6, 'g' => 7, 'T' => 8},
|
125
|
-
{:A => 9, :c => 10, 'g' => 11, 'T' => 12},
|
126
|
-
{:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
|
127
|
-
result: Fabricate(:pm_4x4_unnamed) }
|
128
|
-
}
|
129
|
-
|
130
|
-
bad_cases = {
|
131
|
-
'Nil object on input' => {input: nil},
|
132
|
-
'Empty array on input' => {input: []},
|
133
|
-
'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
|
134
|
-
'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
|
135
|
-
'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
|
136
|
-
'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
|
137
|
-
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
138
|
-
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
139
|
-
'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
|
140
|
-
'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
|
141
|
-
'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
|
142
|
-
'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
|
143
|
-
}
|
144
|
-
|
145
|
-
parser_specs(Parser, good_cases, bad_cases)
|
146
|
-
context '#parser!' do
|
147
|
-
it "should raise an exception on parsing empty list to parser" do
|
148
|
-
expect{ Parser.new().parse! }.to raise_error
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe Parser do
|
6
|
+
context '#initialize' do
|
7
|
+
it 'should accept an array correctly' do
|
8
|
+
Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
|
9
|
+
end
|
10
|
+
it 'should treat several arguments as an array composed of them' do
|
11
|
+
Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
|
12
|
+
end
|
13
|
+
it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
|
14
|
+
Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context '::parse!' do
|
19
|
+
it 'should behave like Parser.new(input).parse!' do
|
20
|
+
Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
|
21
|
+
expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context '::parse' do
|
26
|
+
it 'should behave like Parser.new(input).parse!' do
|
27
|
+
Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
|
28
|
+
Parser.parse([1,2,3],[4,5,6]).should be_nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context '::choose' do
|
33
|
+
it 'should create parser of appropriate type' do
|
34
|
+
Parser.choose([[1,2,3,4],[5,6,7,8]]).should be_kind_of(Parser)
|
35
|
+
Parser.choose([[1,2,3,4],[5,6,7,8]]).input.should == [[1,2,3,4],[5,6,7,8]]
|
36
|
+
Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').should be_kind_of(TrivialParser)
|
37
|
+
Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').input.should == {matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}
|
38
|
+
Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
|
39
|
+
Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
context '::split_on_motifs' do
|
44
|
+
it 'should be able to get a single PM' do
|
45
|
+
Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context '::normalize_hash_keys' do
|
50
|
+
it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
|
51
|
+
Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context '::need_transpose?' do
|
56
|
+
it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
|
57
|
+
Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
|
58
|
+
Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
|
59
|
+
end
|
60
|
+
end
|
61
|
+
context '::array_from_acgt_hash' do
|
62
|
+
it 'should convert hash of arrays to a transposed array of arrays' do
|
63
|
+
input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
|
64
|
+
Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
|
65
|
+
end
|
66
|
+
it 'should convert hash of numbers to an array of numbers' do
|
67
|
+
input = {A: 1, C: 2, G: 3, T: 4}
|
68
|
+
Parser.array_from_acgt_hash(input).should == [1,2,3,4]
|
69
|
+
end
|
70
|
+
it 'should process both symbolic and string keys, in both upcase and downcase' do
|
71
|
+
input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
|
72
|
+
input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
|
73
|
+
Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
context '::try_convert_to_array' do
|
78
|
+
it 'should not change array' do
|
79
|
+
inputs = []
|
80
|
+
inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
|
81
|
+
inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
|
82
|
+
inputs.each do |input|
|
83
|
+
Parser.try_convert_to_array( input ).should == input
|
84
|
+
end
|
85
|
+
end
|
86
|
+
it 'should convert ACGT-Hashes to an array of positions (not letters)' do
|
87
|
+
Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context '#parse' do
|
92
|
+
it 'should give the same result as #parse!' do
|
93
|
+
parser = Parser.new('stub parser')
|
94
|
+
parser.stub(:parse!).and_return('stub result')
|
95
|
+
parser.parse.should == 'stub result'
|
96
|
+
end
|
97
|
+
it 'should return nil if #parse! raised an exception' do
|
98
|
+
parser = Parser.new('stub parser')
|
99
|
+
parser.stub(:parse!).and_raise
|
100
|
+
parser.parse.should be_nil
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
good_cases = {
|
105
|
+
'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
|
106
|
+
result: Fabricate(:pm_unnamed) },
|
107
|
+
|
108
|
+
'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
|
109
|
+
result: Fabricate(:pm_unnamed) },
|
110
|
+
|
111
|
+
'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
|
112
|
+
result: Fabricate(:pm_unnamed) },
|
113
|
+
|
114
|
+
'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
|
115
|
+
result: Fabricate(:pm_unnamed) },
|
116
|
+
|
117
|
+
'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
|
118
|
+
result: Fabricate(:pm_4x4_unnamed) },
|
119
|
+
|
120
|
+
'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
|
121
|
+
result: Fabricate(:pm_4x4_unnamed) },
|
122
|
+
|
123
|
+
'4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
|
124
|
+
{:A => 5, :c => 6, 'g' => 7, 'T' => 8},
|
125
|
+
{:A => 9, :c => 10, 'g' => 11, 'T' => 12},
|
126
|
+
{:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
|
127
|
+
result: Fabricate(:pm_4x4_unnamed) }
|
128
|
+
}
|
129
|
+
|
130
|
+
bad_cases = {
|
131
|
+
'Nil object on input' => {input: nil},
|
132
|
+
'Empty array on input' => {input: []},
|
133
|
+
'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
|
134
|
+
'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
|
135
|
+
'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
|
136
|
+
'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
|
137
|
+
'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
|
138
|
+
'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
|
139
|
+
'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
|
140
|
+
'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
|
141
|
+
'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
|
142
|
+
'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
|
143
|
+
}
|
144
|
+
|
145
|
+
parser_specs(Parser, good_cases, bad_cases)
|
146
|
+
context '#parser!' do
|
147
|
+
it "should raise an exception on parsing empty list to parser" do
|
148
|
+
expect{ Parser.new().parse! }.to raise_error
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -1,70 +1,70 @@
|
|
1
|
-
require_relative '../spec_helper'
|
2
|
-
require_relative '../../lib/bioinform/parsers/string_fantom_parser'
|
3
|
-
|
4
|
-
module Bioinform
|
5
|
-
describe StringFantomParser do
|
6
|
-
describe '#split_on_motifs' do
|
7
|
-
it 'should be able to parse several motifs' do
|
8
|
-
input = "
|
9
|
-
//
|
10
|
-
NA motif_1
|
11
|
-
P0 A C G T
|
12
|
-
P1 0 1 2 3
|
13
|
-
P2 4 5 6 7
|
14
|
-
//
|
15
|
-
//
|
16
|
-
NA motif_2
|
17
|
-
P0 A C G T
|
18
|
-
P1 1 2 3 4
|
19
|
-
P2 5 6 7 8
|
20
|
-
P3 9 10 11 12
|
21
|
-
//
|
22
|
-
NA motif_3
|
23
|
-
P0 A C G T
|
24
|
-
P1 2 3 4 5
|
25
|
-
P2 6 7 8 9"
|
26
|
-
StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
good_cases = {
|
31
|
-
'string in Fantom-format' => {input: "
|
32
|
-
NA PM_name
|
33
|
-
P0 A C G T
|
34
|
-
P1 1 2 3 4
|
35
|
-
P2 5 6 7 8",
|
36
|
-
result: Fabricate(:pm)
|
37
|
-
},
|
38
|
-
|
39
|
-
'motif with additional rows' => {input: "
|
40
|
-
NA PM_name
|
41
|
-
P0 A C G T S P
|
42
|
-
P1 1 2 3 4 5 10
|
43
|
-
P2 5 6 7 8 5 11",
|
44
|
-
result: Fabricate(:pm)
|
45
|
-
},
|
46
|
-
|
47
|
-
'string with more than 10 positions(2-digit row numbers)' => {input: "
|
48
|
-
NA PM_name
|
49
|
-
P0 A C G T
|
50
|
-
P1 1 2 3 4
|
51
|
-
P2 5 6 7 8
|
52
|
-
P3 1 2 3 4
|
53
|
-
P4 5 6 7 8
|
54
|
-
P5 1 2 3 4
|
55
|
-
P6 5 6 7 8
|
56
|
-
P7 1 2 3 4
|
57
|
-
P8 5 6 7 8
|
58
|
-
P9 1 2 3 4
|
59
|
-
P10 5 6 7 8
|
60
|
-
P11 1 2 3 4
|
61
|
-
P12 5 6 7 8",
|
62
|
-
result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
|
63
|
-
}
|
64
|
-
}
|
65
|
-
|
66
|
-
bad_cases = { }
|
67
|
-
|
68
|
-
parser_specs(StringFantomParser, good_cases, bad_cases)
|
69
|
-
end
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative '../../lib/bioinform/parsers/string_fantom_parser'
|
3
|
+
|
4
|
+
module Bioinform
|
5
|
+
describe StringFantomParser do
|
6
|
+
describe '#split_on_motifs' do
|
7
|
+
it 'should be able to parse several motifs' do
|
8
|
+
input = "
|
9
|
+
//
|
10
|
+
NA motif_1
|
11
|
+
P0 A C G T
|
12
|
+
P1 0 1 2 3
|
13
|
+
P2 4 5 6 7
|
14
|
+
//
|
15
|
+
//
|
16
|
+
NA motif_2
|
17
|
+
P0 A C G T
|
18
|
+
P1 1 2 3 4
|
19
|
+
P2 5 6 7 8
|
20
|
+
P3 9 10 11 12
|
21
|
+
//
|
22
|
+
NA motif_3
|
23
|
+
P0 A C G T
|
24
|
+
P1 2 3 4 5
|
25
|
+
P2 6 7 8 9"
|
26
|
+
StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
good_cases = {
|
31
|
+
'string in Fantom-format' => {input: "
|
32
|
+
NA PM_name
|
33
|
+
P0 A C G T
|
34
|
+
P1 1 2 3 4
|
35
|
+
P2 5 6 7 8",
|
36
|
+
result: Fabricate(:pm)
|
37
|
+
},
|
38
|
+
|
39
|
+
'motif with additional rows' => {input: "
|
40
|
+
NA PM_name
|
41
|
+
P0 A C G T S P
|
42
|
+
P1 1 2 3 4 5 10
|
43
|
+
P2 5 6 7 8 5 11",
|
44
|
+
result: Fabricate(:pm)
|
45
|
+
},
|
46
|
+
|
47
|
+
'string with more than 10 positions(2-digit row numbers)' => {input: "
|
48
|
+
NA PM_name
|
49
|
+
P0 A C G T
|
50
|
+
P1 1 2 3 4
|
51
|
+
P2 5 6 7 8
|
52
|
+
P3 1 2 3 4
|
53
|
+
P4 5 6 7 8
|
54
|
+
P5 1 2 3 4
|
55
|
+
P6 5 6 7 8
|
56
|
+
P7 1 2 3 4
|
57
|
+
P8 5 6 7 8
|
58
|
+
P9 1 2 3 4
|
59
|
+
P10 5 6 7 8
|
60
|
+
P11 1 2 3 4
|
61
|
+
P12 5 6 7 8",
|
62
|
+
result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
bad_cases = { }
|
67
|
+
|
68
|
+
parser_specs(StringFantomParser, good_cases, bad_cases)
|
69
|
+
end
|
70
70
|
end
|