bioinform 0.1.12 → 0.1.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +16 -16
  4. data/LICENSE +21 -21
  5. data/README.md +35 -35
  6. data/Rakefile +4 -4
  7. data/TODO.txt +37 -37
  8. data/bin/merge_into_collection +3 -3
  9. data/bin/pcm2pwm +3 -3
  10. data/bin/split_motifs +3 -3
  11. data/bioinform.gemspec +19 -19
  12. data/lib/bioinform/cli/convert_motif.rb +107 -107
  13. data/lib/bioinform/cli/merge_into_collection.rb +79 -79
  14. data/lib/bioinform/cli/pcm2pwm.rb +46 -46
  15. data/lib/bioinform/cli/split_motifs.rb +46 -46
  16. data/lib/bioinform/cli.rb +29 -29
  17. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +18 -18
  18. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +19 -19
  19. data/lib/bioinform/data_models/collection.rb +74 -74
  20. data/lib/bioinform/data_models/motif.rb +55 -55
  21. data/lib/bioinform/data_models/pcm.rb +23 -23
  22. data/lib/bioinform/data_models/pm.rb +169 -169
  23. data/lib/bioinform/data_models/ppm.rb +9 -9
  24. data/lib/bioinform/data_models/pwm.rb +55 -55
  25. data/lib/bioinform/data_models.rb +10 -10
  26. data/lib/bioinform/formatters/raw_formatter.rb +40 -40
  27. data/lib/bioinform/formatters/transfac_formatter.rb +38 -38
  28. data/lib/bioinform/formatters.rb +1 -1
  29. data/lib/bioinform/parsers/jaspar_parser.rb +34 -34
  30. data/lib/bioinform/parsers/parser.rb +87 -87
  31. data/lib/bioinform/parsers/splittable_parser.rb +56 -56
  32. data/lib/bioinform/parsers/string_fantom_parser.rb +34 -34
  33. data/lib/bioinform/parsers/string_parser.rb +71 -71
  34. data/lib/bioinform/parsers/trivial_parser.rb +33 -33
  35. data/lib/bioinform/parsers/yaml_parser.rb +34 -34
  36. data/lib/bioinform/parsers.rb +6 -6
  37. data/lib/bioinform/support/array_product.rb +5 -5
  38. data/lib/bioinform/support/array_zip.rb +5 -5
  39. data/lib/bioinform/support/collect_hash.rb +6 -6
  40. data/lib/bioinform/support/deep_dup.rb +4 -4
  41. data/lib/bioinform/support/delete_many.rb +13 -13
  42. data/lib/bioinform/support/inverf.rb +12 -12
  43. data/lib/bioinform/support/multiline_squish.rb +5 -5
  44. data/lib/bioinform/support/parameters.rb +27 -27
  45. data/lib/bioinform/support/partial_sums.rb +15 -15
  46. data/lib/bioinform/support/same_by.rb +12 -12
  47. data/lib/bioinform/support/strip_doc.rb +8 -8
  48. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +3 -0
  49. data/lib/bioinform/support.rb +17 -17
  50. data/lib/bioinform/version.rb +3 -3
  51. data/lib/bioinform.rb +10 -10
  52. data/spec/cli/cli_spec.rb +13 -13
  53. data/spec/cli/convert_motif_spec.rb +106 -106
  54. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -14
  55. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +11 -11
  56. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +12 -12
  57. data/spec/cli/data/merge_into_collection/collection.txt.result +40 -40
  58. data/spec/cli/data/merge_into_collection/collection.yaml.result +188 -188
  59. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +188 -188
  60. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -14
  61. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -11
  62. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -12
  63. data/spec/cli/data/pcm2pwm/KLF4 f2 spaced name.pcm +11 -11
  64. data/spec/cli/data/pcm2pwm/KLF4_f2.pcm +11 -11
  65. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -11
  66. data/spec/cli/data/pcm2pwm/SP1_f1.pcm +12 -12
  67. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -12
  68. data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -14
  69. data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -11
  70. data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -12
  71. data/spec/cli/data/split_motifs/collection.yaml +188 -188
  72. data/spec/cli/data/split_motifs/plain_collection.txt +38 -38
  73. data/spec/cli/merge_into_collection_spec.rb +99 -99
  74. data/spec/cli/pcm2pwm_spec.rb +79 -79
  75. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +17 -17
  76. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +14 -14
  77. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +49 -49
  78. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +4 -4
  79. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  80. data/spec/cli/split_motifs_spec.rb +76 -76
  81. data/spec/data_models/collection_spec.rb +97 -97
  82. data/spec/data_models/motif_spec.rb +223 -223
  83. data/spec/data_models/pcm_spec.rb +55 -55
  84. data/spec/data_models/pm_spec.rb +359 -359
  85. data/spec/data_models/ppm_spec.rb +7 -7
  86. data/spec/data_models/pwm_spec.rb +82 -82
  87. data/spec/fabricators/collection_fabricator.rb +7 -7
  88. data/spec/fabricators/motif_fabricator.rb +32 -32
  89. data/spec/fabricators/motif_formats_fabricator.rb +124 -124
  90. data/spec/fabricators/pcm_fabricator.rb +24 -24
  91. data/spec/fabricators/pm_fabricator.rb +51 -51
  92. data/spec/fabricators/ppm_fabricator.rb +13 -13
  93. data/spec/fabricators/pwm_fabricator.rb +16 -16
  94. data/spec/parsers/parser_spec.rb +152 -152
  95. data/spec/parsers/string_fantom_parser_spec.rb +69 -69
  96. data/spec/parsers/string_parser_spec.rb +76 -76
  97. data/spec/parsers/trivial_parser_spec.rb +63 -63
  98. data/spec/parsers/yaml_parser_spec.rb +50 -50
  99. data/spec/spec_helper.rb +10 -10
  100. data/spec/spec_helper_source.rb +59 -59
  101. data/spec/support/advanced_scan_spec.rb +31 -31
  102. data/spec/support/array_product_spec.rb +14 -14
  103. data/spec/support/array_zip_spec.rb +14 -14
  104. data/spec/support/collect_hash_spec.rb +14 -14
  105. data/spec/support/delete_many_spec.rb +43 -43
  106. data/spec/support/inverf_spec.rb +18 -18
  107. data/spec/support/multiline_squish_spec.rb +24 -24
  108. data/spec/support/partial_sums_spec.rb +30 -30
  109. data/spec/support/same_by_spec.rb +35 -35
  110. metadata +3 -3
@@ -1,52 +1,52 @@
1
- Fabricator(:pm, class_name: Bioinform::PM) do
2
- initialize_with{ Bioinform::PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name') }
3
- end
4
-
5
- Fabricator(:pm_unnamed, from: :pm) do
6
- name nil
7
- end
8
-
9
-
10
- Fabricator(:pm_first, from: :pm) do
11
- name 'PM_first'
12
- end
13
-
14
- Fabricator(:pm_second, from: :pm) do
15
- matrix [[15,16,17,18],[11,21,31,41]]
16
- name 'PM_second'
17
- end
18
-
19
-
20
- Fabricator(:pm_4x4, from: :pm) do
21
- matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
22
- end
23
-
24
- Fabricator(:pm_4x4_unnamed, from: :pm_4x4) do
25
- name nil
26
- end
27
-
28
- Fabricator(:pm_with_floats, from: :pm_unnamed) do
29
- matrix [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]
30
- end
31
-
32
- Fabricator(:pm_1, from: :pm) do
33
- matrix [[0,1,2,3],[4,5,6,7]]
34
- name 'motif_1'
35
- end
36
- Fabricator(:pm_2, from: :pm) do
37
- matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12]]
38
- name 'motif_2'
39
- end
40
- Fabricator(:pm_3, from: :pm) do
41
- matrix [[2,3,4,5],[6,7,8,9]]
42
- name 'motif_3'
43
- end
44
-
45
- Fabricator(:pm_4,from: :pm) do
46
- matrix [[1,0,1,0],[0,0,0,0],[1,2,3,4]]
47
- name 'pm 4'
48
- end
49
- Fabricator(:pm_5, from: :pm) do
50
- matrix [[1,2,1,2],[0,3,6,9],[1,2,3,4]]
51
- name 'pm 5'
1
+ Fabricator(:pm, class_name: Bioinform::PM) do
2
+ initialize_with{ Bioinform::PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name') }
3
+ end
4
+
5
+ Fabricator(:pm_unnamed, from: :pm) do
6
+ name nil
7
+ end
8
+
9
+
10
+ Fabricator(:pm_first, from: :pm) do
11
+ name 'PM_first'
12
+ end
13
+
14
+ Fabricator(:pm_second, from: :pm) do
15
+ matrix [[15,16,17,18],[11,21,31,41]]
16
+ name 'PM_second'
17
+ end
18
+
19
+
20
+ Fabricator(:pm_4x4, from: :pm) do
21
+ matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
22
+ end
23
+
24
+ Fabricator(:pm_4x4_unnamed, from: :pm_4x4) do
25
+ name nil
26
+ end
27
+
28
+ Fabricator(:pm_with_floats, from: :pm_unnamed) do
29
+ matrix [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]
30
+ end
31
+
32
+ Fabricator(:pm_1, from: :pm) do
33
+ matrix [[0,1,2,3],[4,5,6,7]]
34
+ name 'motif_1'
35
+ end
36
+ Fabricator(:pm_2, from: :pm) do
37
+ matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12]]
38
+ name 'motif_2'
39
+ end
40
+ Fabricator(:pm_3, from: :pm) do
41
+ matrix [[2,3,4,5],[6,7,8,9]]
42
+ name 'motif_3'
43
+ end
44
+
45
+ Fabricator(:pm_4,from: :pm) do
46
+ matrix [[1,0,1,0],[0,0,0,0],[1,2,3,4]]
47
+ name 'pm 4'
48
+ end
49
+ Fabricator(:pm_5, from: :pm) do
50
+ matrix [[1,2,1,2],[0,3,6,9],[1,2,3,4]]
51
+ name 'pm 5'
52
52
  end
@@ -1,14 +1,14 @@
1
- Fabricator(:ppm, class_name: Bioinform::PPM) do
2
- initialize_with{ Bioinform::PPM.new(matrix: [[0.2, 0.3, 0.3, 0.2],[0.7, 0.2, 0.0, 0.1]]) }
3
- name 'PPM_name'
4
- end
5
-
6
- # It has the same name as original pcm because PCM#to_ppm doesn't change the name
7
- Fabricator(:ppm_by_pcm, class_name: Bioinform::PPM) do
8
- initialize_with{ Fabricate(:pcm).to_ppm }
9
- end
10
-
11
- Fabricator(:ppm_pcm_divided_by_count, from: :ppm) do
12
- # this matrix should be initialized manually - it's used for spec checking PCM#to_ppm
13
- matrix [[1.0/7.0, 2.0/7.0, 3.0/7.0, 1.0/7.0], [4.0/7.0, 0.0/7.0, 1.0/7.0, 2.0/7.0]]
1
+ Fabricator(:ppm, class_name: Bioinform::PPM) do
2
+ initialize_with{ Bioinform::PPM.new(matrix: [[0.2, 0.3, 0.3, 0.2],[0.7, 0.2, 0.0, 0.1]]) }
3
+ name 'PPM_name'
4
+ end
5
+
6
+ # It has the same name as original pcm because PCM#to_ppm doesn't change the name
7
+ Fabricator(:ppm_by_pcm, class_name: Bioinform::PPM) do
8
+ initialize_with{ Fabricate(:pcm).to_ppm }
9
+ end
10
+
11
+ Fabricator(:ppm_pcm_divided_by_count, from: :ppm) do
12
+ # this matrix should be initialized manually - it's used for spec checking PCM#to_ppm
13
+ matrix [[1.0/7.0, 2.0/7.0, 3.0/7.0, 1.0/7.0], [4.0/7.0, 0.0/7.0, 1.0/7.0, 2.0/7.0]]
14
14
  end
@@ -1,16 +1,16 @@
1
- Fabricator(:pwm, class_name: Bioinform::PWM) do
2
- initialize_with{ Bioinform::PWM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PWM_name')}
3
- end
4
-
5
- # It has name 'PCM_name' because name isn't converted during #to_pwm
6
- Fabricator(:pwm_by_pcm, class_name: Bioinform::PWM) do
7
- initialize_with{ Fabricate(:pcm).to_pwm }
8
- end
9
-
10
- Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_1, from: :pwm) do
11
- matrix [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
12
- end
13
-
14
- Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_10, from: :pwm) do
15
- matrix [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
16
- end
1
+ Fabricator(:pwm, class_name: Bioinform::PWM) do
2
+ initialize_with{ Bioinform::PWM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PWM_name')}
3
+ end
4
+
5
+ # It has name 'PCM_name' because name isn't converted during #to_pwm
6
+ Fabricator(:pwm_by_pcm, class_name: Bioinform::PWM) do
7
+ initialize_with{ Fabricate(:pcm).to_pwm }
8
+ end
9
+
10
+ Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_1, from: :pwm) do
11
+ matrix [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
12
+ end
13
+
14
+ Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_10, from: :pwm) do
15
+ matrix [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
16
+ end
@@ -1,152 +1,152 @@
1
- require_relative '../spec_helper'
2
- require_relative '../../lib/bioinform/parsers/parser'
3
-
4
- module Bioinform
5
- describe Parser do
6
- context '#initialize' do
7
- it 'should accept an array correctly' do
8
- Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
9
- end
10
- it 'should treat several arguments as an array composed of them' do
11
- Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
12
- end
13
- it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
14
- Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
15
- end
16
- end
17
-
18
- context '::parse!' do
19
- it 'should behave like Parser.new(input).parse!' do
20
- Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
21
- expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
22
- end
23
- end
24
-
25
- context '::parse' do
26
- it 'should behave like Parser.new(input).parse!' do
27
- Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
28
- Parser.parse([1,2,3],[4,5,6]).should be_nil
29
- end
30
- end
31
-
32
- context '::choose' do
33
- it 'should create parser of appropriate type' do
34
- Parser.choose([[1,2,3,4],[5,6,7,8]]).should be_kind_of(Parser)
35
- Parser.choose([[1,2,3,4],[5,6,7,8]]).input.should == [[1,2,3,4],[5,6,7,8]]
36
- Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').should be_kind_of(TrivialParser)
37
- Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').input.should == {matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}
38
- Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
39
- Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
40
- end
41
- end
42
-
43
- context '::split_on_motifs' do
44
- it 'should be able to get a single PM' do
45
- Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
46
- end
47
- end
48
-
49
- context '::normalize_hash_keys' do
50
- it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
51
- Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
52
- end
53
- end
54
-
55
- context '::need_transpose?' do
56
- it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
57
- Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
58
- Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
59
- end
60
- end
61
- context '::array_from_acgt_hash' do
62
- it 'should convert hash of arrays to a transposed array of arrays' do
63
- input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
64
- Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
65
- end
66
- it 'should convert hash of numbers to an array of numbers' do
67
- input = {A: 1, C: 2, G: 3, T: 4}
68
- Parser.array_from_acgt_hash(input).should == [1,2,3,4]
69
- end
70
- it 'should process both symbolic and string keys, in both upcase and downcase' do
71
- input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
72
- input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
73
- Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
74
- end
75
- end
76
-
77
- context '::try_convert_to_array' do
78
- it 'should not change array' do
79
- inputs = []
80
- inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
81
- inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
82
- inputs.each do |input|
83
- Parser.try_convert_to_array( input ).should == input
84
- end
85
- end
86
- it 'should convert ACGT-Hashes to an array of positions (not letters)' do
87
- Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
88
- end
89
- end
90
-
91
- context '#parse' do
92
- it 'should give the same result as #parse!' do
93
- parser = Parser.new('stub parser')
94
- parser.stub(:parse!).and_return('stub result')
95
- parser.parse.should == 'stub result'
96
- end
97
- it 'should return nil if #parse! raised an exception' do
98
- parser = Parser.new('stub parser')
99
- parser.stub(:parse!).and_raise
100
- parser.parse.should be_nil
101
- end
102
- end
103
-
104
- good_cases = {
105
- 'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
106
- result: Fabricate(:pm_unnamed) },
107
-
108
- 'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
109
- result: Fabricate(:pm_unnamed) },
110
-
111
- 'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
112
- result: Fabricate(:pm_unnamed) },
113
-
114
- 'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
115
- result: Fabricate(:pm_unnamed) },
116
-
117
- 'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
118
- result: Fabricate(:pm_4x4_unnamed) },
119
-
120
- 'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
121
- result: Fabricate(:pm_4x4_unnamed) },
122
-
123
- '4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
124
- {:A => 5, :c => 6, 'g' => 7, 'T' => 8},
125
- {:A => 9, :c => 10, 'g' => 11, 'T' => 12},
126
- {:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
127
- result: Fabricate(:pm_4x4_unnamed) }
128
- }
129
-
130
- bad_cases = {
131
- 'Nil object on input' => {input: nil},
132
- 'Empty array on input' => {input: []},
133
- 'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
134
- 'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
135
- 'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
136
- 'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
137
- 'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
138
- 'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
139
- 'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
140
- 'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
141
- 'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
142
- 'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
143
- }
144
-
145
- parser_specs(Parser, good_cases, bad_cases)
146
- context '#parser!' do
147
- it "should raise an exception on parsing empty list to parser" do
148
- expect{ Parser.new().parse! }.to raise_error
149
- end
150
- end
151
- end
152
- end
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/parser'
3
+
4
+ module Bioinform
5
+ describe Parser do
6
+ context '#initialize' do
7
+ it 'should accept an array correctly' do
8
+ Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
9
+ end
10
+ it 'should treat several arguments as an array composed of them' do
11
+ Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
12
+ end
13
+ it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
14
+ Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
15
+ end
16
+ end
17
+
18
+ context '::parse!' do
19
+ it 'should behave like Parser.new(input).parse!' do
20
+ Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
21
+ expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
22
+ end
23
+ end
24
+
25
+ context '::parse' do
26
+ it 'should behave like Parser.new(input).parse!' do
27
+ Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
28
+ Parser.parse([1,2,3],[4,5,6]).should be_nil
29
+ end
30
+ end
31
+
32
+ context '::choose' do
33
+ it 'should create parser of appropriate type' do
34
+ Parser.choose([[1,2,3,4],[5,6,7,8]]).should be_kind_of(Parser)
35
+ Parser.choose([[1,2,3,4],[5,6,7,8]]).input.should == [[1,2,3,4],[5,6,7,8]]
36
+ Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').should be_kind_of(TrivialParser)
37
+ Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').input.should == {matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}
38
+ Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
39
+ Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
40
+ end
41
+ end
42
+
43
+ context '::split_on_motifs' do
44
+ it 'should be able to get a single PM' do
45
+ Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
46
+ end
47
+ end
48
+
49
+ context '::normalize_hash_keys' do
50
+ it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
51
+ Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
52
+ end
53
+ end
54
+
55
+ context '::need_transpose?' do
56
+ it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
57
+ Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
58
+ Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
59
+ end
60
+ end
61
+ context '::array_from_acgt_hash' do
62
+ it 'should convert hash of arrays to a transposed array of arrays' do
63
+ input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
64
+ Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
65
+ end
66
+ it 'should convert hash of numbers to an array of numbers' do
67
+ input = {A: 1, C: 2, G: 3, T: 4}
68
+ Parser.array_from_acgt_hash(input).should == [1,2,3,4]
69
+ end
70
+ it 'should process both symbolic and string keys, in both upcase and downcase' do
71
+ input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
72
+ input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
73
+ Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
74
+ end
75
+ end
76
+
77
+ context '::try_convert_to_array' do
78
+ it 'should not change array' do
79
+ inputs = []
80
+ inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
81
+ inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
82
+ inputs.each do |input|
83
+ Parser.try_convert_to_array( input ).should == input
84
+ end
85
+ end
86
+ it 'should convert ACGT-Hashes to an array of positions (not letters)' do
87
+ Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
88
+ end
89
+ end
90
+
91
+ context '#parse' do
92
+ it 'should give the same result as #parse!' do
93
+ parser = Parser.new('stub parser')
94
+ parser.stub(:parse!).and_return('stub result')
95
+ parser.parse.should == 'stub result'
96
+ end
97
+ it 'should return nil if #parse! raised an exception' do
98
+ parser = Parser.new('stub parser')
99
+ parser.stub(:parse!).and_raise
100
+ parser.parse.should be_nil
101
+ end
102
+ end
103
+
104
+ good_cases = {
105
+ 'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
106
+ result: Fabricate(:pm_unnamed) },
107
+
108
+ 'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
109
+ result: Fabricate(:pm_unnamed) },
110
+
111
+ 'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
112
+ result: Fabricate(:pm_unnamed) },
113
+
114
+ 'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
115
+ result: Fabricate(:pm_unnamed) },
116
+
117
+ 'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
118
+ result: Fabricate(:pm_4x4_unnamed) },
119
+
120
+ 'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
121
+ result: Fabricate(:pm_4x4_unnamed) },
122
+
123
+ '4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
124
+ {:A => 5, :c => 6, 'g' => 7, 'T' => 8},
125
+ {:A => 9, :c => 10, 'g' => 11, 'T' => 12},
126
+ {:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
127
+ result: Fabricate(:pm_4x4_unnamed) }
128
+ }
129
+
130
+ bad_cases = {
131
+ 'Nil object on input' => {input: nil},
132
+ 'Empty array on input' => {input: []},
133
+ 'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
134
+ 'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
135
+ 'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
136
+ 'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
137
+ 'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
138
+ 'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
139
+ 'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
140
+ 'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
141
+ 'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
142
+ 'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
143
+ }
144
+
145
+ parser_specs(Parser, good_cases, bad_cases)
146
+ context '#parser!' do
147
+ it "should raise an exception on parsing empty list to parser" do
148
+ expect{ Parser.new().parse! }.to raise_error
149
+ end
150
+ end
151
+ end
152
+ end
@@ -1,70 +1,70 @@
1
- require_relative '../spec_helper'
2
- require_relative '../../lib/bioinform/parsers/string_fantom_parser'
3
-
4
- module Bioinform
5
- describe StringFantomParser do
6
- describe '#split_on_motifs' do
7
- it 'should be able to parse several motifs' do
8
- input = "
9
- //
10
- NA motif_1
11
- P0 A C G T
12
- P1 0 1 2 3
13
- P2 4 5 6 7
14
- //
15
- //
16
- NA motif_2
17
- P0 A C G T
18
- P1 1 2 3 4
19
- P2 5 6 7 8
20
- P3 9 10 11 12
21
- //
22
- NA motif_3
23
- P0 A C G T
24
- P1 2 3 4 5
25
- P2 6 7 8 9"
26
- StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
27
- end
28
- end
29
-
30
- good_cases = {
31
- 'string in Fantom-format' => {input: "
32
- NA PM_name
33
- P0 A C G T
34
- P1 1 2 3 4
35
- P2 5 6 7 8",
36
- result: Fabricate(:pm)
37
- },
38
-
39
- 'motif with additional rows' => {input: "
40
- NA PM_name
41
- P0 A C G T S P
42
- P1 1 2 3 4 5 10
43
- P2 5 6 7 8 5 11",
44
- result: Fabricate(:pm)
45
- },
46
-
47
- 'string with more than 10 positions(2-digit row numbers)' => {input: "
48
- NA PM_name
49
- P0 A C G T
50
- P1 1 2 3 4
51
- P2 5 6 7 8
52
- P3 1 2 3 4
53
- P4 5 6 7 8
54
- P5 1 2 3 4
55
- P6 5 6 7 8
56
- P7 1 2 3 4
57
- P8 5 6 7 8
58
- P9 1 2 3 4
59
- P10 5 6 7 8
60
- P11 1 2 3 4
61
- P12 5 6 7 8",
62
- result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
63
- }
64
- }
65
-
66
- bad_cases = { }
67
-
68
- parser_specs(StringFantomParser, good_cases, bad_cases)
69
- end
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/string_fantom_parser'
3
+
4
+ module Bioinform
5
+ describe StringFantomParser do
6
+ describe '#split_on_motifs' do
7
+ it 'should be able to parse several motifs' do
8
+ input = "
9
+ //
10
+ NA motif_1
11
+ P0 A C G T
12
+ P1 0 1 2 3
13
+ P2 4 5 6 7
14
+ //
15
+ //
16
+ NA motif_2
17
+ P0 A C G T
18
+ P1 1 2 3 4
19
+ P2 5 6 7 8
20
+ P3 9 10 11 12
21
+ //
22
+ NA motif_3
23
+ P0 A C G T
24
+ P1 2 3 4 5
25
+ P2 6 7 8 9"
26
+ StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
27
+ end
28
+ end
29
+
30
+ good_cases = {
31
+ 'string in Fantom-format' => {input: "
32
+ NA PM_name
33
+ P0 A C G T
34
+ P1 1 2 3 4
35
+ P2 5 6 7 8",
36
+ result: Fabricate(:pm)
37
+ },
38
+
39
+ 'motif with additional rows' => {input: "
40
+ NA PM_name
41
+ P0 A C G T S P
42
+ P1 1 2 3 4 5 10
43
+ P2 5 6 7 8 5 11",
44
+ result: Fabricate(:pm)
45
+ },
46
+
47
+ 'string with more than 10 positions(2-digit row numbers)' => {input: "
48
+ NA PM_name
49
+ P0 A C G T
50
+ P1 1 2 3 4
51
+ P2 5 6 7 8
52
+ P3 1 2 3 4
53
+ P4 5 6 7 8
54
+ P5 1 2 3 4
55
+ P6 5 6 7 8
56
+ P7 1 2 3 4
57
+ P8 5 6 7 8
58
+ P9 1 2 3 4
59
+ P10 5 6 7 8
60
+ P11 1 2 3 4
61
+ P12 5 6 7 8",
62
+ result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
63
+ }
64
+ }
65
+
66
+ bad_cases = { }
67
+
68
+ parser_specs(StringFantomParser, good_cases, bad_cases)
69
+ end
70
70
  end