bioinform 0.1.17 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -3
  3. data/LICENSE +0 -1
  4. data/README.md +1 -1
  5. data/TODO.txt +23 -30
  6. data/bin/convert_motif +4 -0
  7. data/bin/pcm2pwm +1 -1
  8. data/bin/split_motifs +1 -1
  9. data/bioinform.gemspec +0 -2
  10. data/lib/bioinform.rb +54 -16
  11. data/lib/bioinform/alphabet.rb +85 -0
  12. data/lib/bioinform/background.rb +90 -0
  13. data/lib/bioinform/cli.rb +1 -2
  14. data/lib/bioinform/cli/convert_motif.rb +52 -17
  15. data/lib/bioinform/cli/pcm2pwm.rb +32 -26
  16. data/lib/bioinform/cli/split_motifs.rb +31 -30
  17. data/lib/bioinform/conversion_algorithms.rb +6 -0
  18. data/lib/bioinform/conversion_algorithms/pcm2ppm_converter.rb +13 -11
  19. data/lib/bioinform/conversion_algorithms/pcm2pwm_converter.rb +39 -11
  20. data/lib/bioinform/conversion_algorithms/pcm2pwm_mara_converter.rb +26 -0
  21. data/lib/bioinform/conversion_algorithms/ppm2pcm_converter.rb +30 -0
  22. data/lib/bioinform/conversion_algorithms/pwm2iupac_pwm_converter.rb +23 -0
  23. data/lib/bioinform/conversion_algorithms/pwm2pcm_converter.rb +85 -0
  24. data/lib/bioinform/data_models.rb +1 -7
  25. data/lib/bioinform/data_models/named_model.rb +38 -0
  26. data/lib/bioinform/data_models/pcm.rb +18 -28
  27. data/lib/bioinform/data_models/pm.rb +73 -170
  28. data/lib/bioinform/data_models/ppm.rb +11 -24
  29. data/lib/bioinform/data_models/pwm.rb +30 -56
  30. data/lib/bioinform/errors.rb +17 -0
  31. data/lib/bioinform/formatters.rb +4 -2
  32. data/lib/bioinform/formatters/consensus_formatter.rb +35 -0
  33. data/lib/bioinform/formatters/motif_formatter.rb +69 -0
  34. data/lib/bioinform/formatters/pretty_matrix_formatter.rb +36 -0
  35. data/lib/bioinform/formatters/transfac_formatter.rb +29 -37
  36. data/lib/bioinform/parsers.rb +1 -8
  37. data/lib/bioinform/parsers/matrix_parser.rb +44 -36
  38. data/lib/bioinform/parsers/motif_splitter.rb +45 -0
  39. data/lib/bioinform/support.rb +46 -14
  40. data/lib/bioinform/support/strip_doc.rb +1 -1
  41. data/lib/bioinform/version.rb +1 -1
  42. data/spec/alphabet_spec.rb +79 -0
  43. data/spec/background_spec.rb +57 -0
  44. data/spec/cli/cli_spec.rb +6 -6
  45. data/spec/cli/convert_motif_spec.rb +88 -88
  46. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +9 -9
  47. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +11 -11
  48. data/spec/cli/pcm2pwm_spec.rb +22 -23
  49. data/spec/cli/shared_examples/convert_motif/motif_list_empty.rb +1 -1
  50. data/spec/cli/shared_examples/convert_motif/several_motifs_specified.rb +1 -1
  51. data/spec/cli/shared_examples/convert_motif/single_motif_specified.rb +5 -5
  52. data/spec/cli/shared_examples/convert_motif/yield_help_string.rb +2 -2
  53. data/spec/cli/shared_examples/convert_motif/yield_motif_conversion_error.rb +3 -3
  54. data/spec/cli/split_motifs_spec.rb +6 -21
  55. data/spec/converters/pcm2ppm_converter_spec.rb +32 -0
  56. data/spec/converters/pcm2pwm_converter_spec.rb +71 -0
  57. data/spec/converters/ppm2pcm_converter_spec.rb +32 -0
  58. data/spec/converters/pwm2iupac_pwm_converter_spec.rb +65 -0
  59. data/spec/converters/pwm2pcm_converter_spec.rb +57 -0
  60. data/spec/data_models/named_model_spec.rb +41 -0
  61. data/spec/data_models/pcm_spec.rb +114 -45
  62. data/spec/data_models/pm_spec.rb +132 -333
  63. data/spec/data_models/ppm_spec.rb +47 -44
  64. data/spec/data_models/pwm_spec.rb +85 -77
  65. data/spec/fabricators/motif_formats_fabricator.rb +116 -116
  66. data/spec/formatters/consensus_formatter_spec.rb +26 -0
  67. data/spec/formatters/raw_formatter_spec.rb +169 -0
  68. data/spec/parsers/matrix_parser_spec.rb +216 -0
  69. data/spec/parsers/motif_splitter_spec.rb +87 -0
  70. data/spec/spec_helper.rb +2 -2
  71. data/spec/spec_helper_source.rb +25 -5
  72. data/spec/support_spec.rb +31 -0
  73. metadata +43 -124
  74. data/bin/merge_into_collection +0 -4
  75. data/lib/bioinform/cli/merge_into_collection.rb +0 -80
  76. data/lib/bioinform/conversion_algorithms/ppm2pwm_converter.rb +0 -0
  77. data/lib/bioinform/data_models/collection.rb +0 -75
  78. data/lib/bioinform/data_models/motif.rb +0 -56
  79. data/lib/bioinform/formatters/raw_formatter.rb +0 -41
  80. data/lib/bioinform/parsers/jaspar_parser.rb +0 -35
  81. data/lib/bioinform/parsers/parser.rb +0 -92
  82. data/lib/bioinform/parsers/splittable_parser.rb +0 -57
  83. data/lib/bioinform/parsers/string_fantom_parser.rb +0 -35
  84. data/lib/bioinform/parsers/string_parser.rb +0 -72
  85. data/lib/bioinform/parsers/trivial_parser.rb +0 -34
  86. data/lib/bioinform/parsers/yaml_parser.rb +0 -35
  87. data/lib/bioinform/support/advanced_scan.rb +0 -8
  88. data/lib/bioinform/support/array_product.rb +0 -6
  89. data/lib/bioinform/support/array_zip.rb +0 -6
  90. data/lib/bioinform/support/collect_hash.rb +0 -7
  91. data/lib/bioinform/support/deep_dup.rb +0 -5
  92. data/lib/bioinform/support/delete_many.rb +0 -14
  93. data/lib/bioinform/support/inverf.rb +0 -13
  94. data/lib/bioinform/support/multiline_squish.rb +0 -6
  95. data/lib/bioinform/support/parameters.rb +0 -28
  96. data/lib/bioinform/support/partial_sums.rb +0 -16
  97. data/lib/bioinform/support/same_by.rb +0 -12
  98. data/lib/bioinform/support/third_part/active_support/core_ext/array/extract_options.rb +0 -29
  99. data/lib/bioinform/support/third_part/active_support/core_ext/hash/indifferent_access.rb +0 -23
  100. data/lib/bioinform/support/third_part/active_support/core_ext/hash/keys.rb +0 -54
  101. data/lib/bioinform/support/third_part/active_support/core_ext/module/attribute_accessors.rb +0 -64
  102. data/lib/bioinform/support/third_part/active_support/core_ext/object/try.rb +0 -57
  103. data/lib/bioinform/support/third_part/active_support/core_ext/string/access.rb +0 -99
  104. data/lib/bioinform/support/third_part/active_support/core_ext/string/behavior.rb +0 -6
  105. data/lib/bioinform/support/third_part/active_support/core_ext/string/filters.rb +0 -49
  106. data/lib/bioinform/support/third_part/active_support/core_ext/string/multibyte.rb +0 -72
  107. data/lib/bioinform/support/third_part/active_support/hash_with_indifferent_access.rb +0 -181
  108. data/lib/bioinform/support/third_part/active_support/multibyte.rb +0 -44
  109. data/lib/bioinform/support/third_part/active_support/multibyte/chars.rb +0 -476
  110. data/lib/bioinform/support/third_part/active_support/multibyte/exceptions.rb +0 -8
  111. data/lib/bioinform/support/third_part/active_support/multibyte/unicode.rb +0 -393
  112. data/lib/bioinform/support/third_part/active_support/multibyte/utils.rb +0 -60
  113. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +0 -14
  114. data/spec/cli/data/merge_into_collection/KLF4_f2.pwm +0 -11
  115. data/spec/cli/data/merge_into_collection/SP1_f1.pwm +0 -12
  116. data/spec/cli/data/merge_into_collection/collection.txt.result +0 -40
  117. data/spec/cli/data/merge_into_collection/collection.yaml.result +0 -188
  118. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +0 -188
  119. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +0 -14
  120. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +0 -11
  121. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +0 -12
  122. data/spec/cli/data/split_motifs/collection.yaml +0 -188
  123. data/spec/cli/merge_into_collection_spec.rb +0 -100
  124. data/spec/data_models/collection_spec.rb +0 -98
  125. data/spec/data_models/motif_spec.rb +0 -224
  126. data/spec/fabricators/collection_fabricator.rb +0 -8
  127. data/spec/fabricators/motif_fabricator.rb +0 -33
  128. data/spec/fabricators/pcm_fabricator.rb +0 -25
  129. data/spec/fabricators/pm_fabricator.rb +0 -52
  130. data/spec/fabricators/ppm_fabricator.rb +0 -14
  131. data/spec/fabricators/pwm_fabricator.rb +0 -16
  132. data/spec/parsers/parser_spec.rb +0 -152
  133. data/spec/parsers/string_fantom_parser_spec.rb +0 -70
  134. data/spec/parsers/string_parser_spec.rb +0 -77
  135. data/spec/parsers/trivial_parser_spec.rb +0 -64
  136. data/spec/parsers/yaml_parser_spec.rb +0 -50
  137. data/spec/support/advanced_scan_spec.rb +0 -32
  138. data/spec/support/array_product_spec.rb +0 -15
  139. data/spec/support/array_zip_spec.rb +0 -15
  140. data/spec/support/collect_hash_spec.rb +0 -15
  141. data/spec/support/delete_many_spec.rb +0 -44
  142. data/spec/support/inverf_spec.rb +0 -19
  143. data/spec/support/multiline_squish_spec.rb +0 -25
  144. data/spec/support/partial_sums_spec.rb +0 -30
  145. data/spec/support/same_by_spec.rb +0 -36
@@ -1,8 +0,0 @@
1
- Fabricator(:three_elements_collection, class_name: Bioinform::Collection, aliases: [:pm_collection]) do
2
- name 'PM_collection'
3
- after_build{|collection| collection << Fabricate(:pm_1) << Fabricate(:pm_2) << Fabricate(:pm_3) }
4
- end
5
-
6
- Fabricator(:unnamed_pm_collection, from: :pm_collection) do
7
- name nil
8
- end
@@ -1,33 +0,0 @@
1
- Fabricator(:motif, class_name: Bioinform::Motif) do
2
- end
3
-
4
- Fabricator(:motif_with_name, from: :motif) do
5
- name 'Motif name'
6
- end
7
-
8
- Fabricator(:motif_pcm, from: :motif) do
9
- pcm(fabricator: :pcm)
10
- end
11
-
12
- Fabricator(:motif_pwm, from: :motif) do
13
- pwm(fabricator: :pwm)
14
- end
15
-
16
- Fabricator(:motif_ppm, from: :motif) do
17
- ppm(fabricator: :ppm)
18
- end
19
-
20
- Fabricator(:motif_pcm_and_ppm, from: :motif) do
21
- pcm(fabricator: :pcm)
22
- ppm(fabricator: :ppm)
23
- end
24
-
25
- Fabricator(:motif_pwm_and_ppm, from: :motif) do
26
- pwm(fabricator: :pwm)
27
- ppm(fabricator: :ppm)
28
- end
29
-
30
- Fabricator(:motif_pcm_and_pwm, from: :motif) do
31
- pcm(fabricator: :pcm)
32
- pwm(fabricator: :pwm)
33
- end
@@ -1,25 +0,0 @@
1
- Fabricator(:pcm, class_name: Bioinform::PCM) do
2
- initialize_with{ Bioinform::PCM.new(matrix: [[1, 2, 3, 1],[4, 0, 1, 2]], name: 'PCM_name') }
3
- end
4
-
5
- Fabricator(:pcm_with_floats, from: :pcm) do
6
- matrix [[1, 2.3, 3.2, 1],[4.4, 0.1, 0.9, 2.1]]
7
- end
8
-
9
- Fabricator(:completely_different_pcm, from: :pcm) do
10
- matrix [[101,207,138,248],[85,541,7,61]]
11
- name 'PCM_another_name'
12
- end
13
-
14
- Fabricator(:pcm_1, from: :pcm) do
15
- matrix [[7,10,2,3],[4,5,6,7]]
16
- name 'motif_1'
17
- end
18
- Fabricator(:pcm_2, from: :pcm) do
19
- matrix [[5,7,4,6],[11,6,2,3],[10,3,3,6]]
20
- name 'motif_2'
21
- end
22
- Fabricator(:pcm_3, from: :pcm) do
23
- matrix [[3,4,1,14],[9,2,9,2]]
24
- name 'motif_3'
25
- end
@@ -1,52 +0,0 @@
1
- Fabricator(:pm, class_name: Bioinform::PM) do
2
- initialize_with{ Bioinform::PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PM_name') }
3
- end
4
-
5
- Fabricator(:pm_unnamed, from: :pm) do
6
- name nil
7
- end
8
-
9
-
10
- Fabricator(:pm_first, from: :pm) do
11
- name 'PM_first'
12
- end
13
-
14
- Fabricator(:pm_second, from: :pm) do
15
- matrix [[15,16,17,18],[11,21,31,41]]
16
- name 'PM_second'
17
- end
18
-
19
-
20
- Fabricator(:pm_4x4, from: :pm) do
21
- matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]]
22
- end
23
-
24
- Fabricator(:pm_4x4_unnamed, from: :pm_4x4) do
25
- name nil
26
- end
27
-
28
- Fabricator(:pm_with_floats, from: :pm_unnamed) do
29
- matrix [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]
30
- end
31
-
32
- Fabricator(:pm_1, from: :pm) do
33
- matrix [[0,1,2,3],[4,5,6,7]]
34
- name 'motif_1'
35
- end
36
- Fabricator(:pm_2, from: :pm) do
37
- matrix [[1,2,3,4],[5,6,7,8],[9,10,11,12]]
38
- name 'motif_2'
39
- end
40
- Fabricator(:pm_3, from: :pm) do
41
- matrix [[2,3,4,5],[6,7,8,9]]
42
- name 'motif_3'
43
- end
44
-
45
- Fabricator(:pm_4,from: :pm) do
46
- matrix [[1,0,1,0],[0,0,0,0],[1,2,3,4]]
47
- name 'pm 4'
48
- end
49
- Fabricator(:pm_5, from: :pm) do
50
- matrix [[1,2,1,2],[0,3,6,9],[1,2,3,4]]
51
- name 'pm 5'
52
- end
@@ -1,14 +0,0 @@
1
- Fabricator(:ppm, class_name: Bioinform::PPM) do
2
- initialize_with{ Bioinform::PPM.new(matrix: [[0.2, 0.3, 0.3, 0.2],[0.7, 0.2, 0.0, 0.1]]) }
3
- name 'PPM_name'
4
- end
5
-
6
- # It has the same name as original pcm because PCM#to_ppm doesn't change the name
7
- Fabricator(:ppm_by_pcm, class_name: Bioinform::PPM) do
8
- initialize_with{ Fabricate(:pcm).to_ppm }
9
- end
10
-
11
- Fabricator(:ppm_pcm_divided_by_count, from: :ppm) do
12
- # this matrix should be initialized manually - it's used for spec checking PCM#to_ppm
13
- matrix [[1.0/7.0, 2.0/7.0, 3.0/7.0, 1.0/7.0], [4.0/7.0, 0.0/7.0, 1.0/7.0, 2.0/7.0]]
14
- end
@@ -1,16 +0,0 @@
1
- Fabricator(:pwm, class_name: Bioinform::PWM) do
2
- initialize_with{ Bioinform::PWM.new(matrix: [[1,2,3,4],[5,6,7,8]], name: 'PWM_name')}
3
- end
4
-
5
- # It has name 'PCM_name' because name isn't converted during #to_pwm
6
- Fabricator(:pwm_by_pcm, class_name: Bioinform::PWM) do
7
- initialize_with{ Fabricate(:pcm).to_pwm }
8
- end
9
-
10
- Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_1, from: :pwm) do
11
- matrix [[-0.47, 0.118, 0.486, -0.47],[0.754, -2.079, -0.47, 0.118]]
12
- end
13
-
14
- Fabricator(:rounded_upto_3_digits_pwm_by_pcm_with_pseudocount_10, from: :pwm) do
15
- matrix [[-0.194, 0.057, 0.258, -0.194],[0.425, -0.531, -0.194, 0.057]]
16
- end
@@ -1,152 +0,0 @@
1
- require_relative '../spec_helper'
2
- require_relative '../../lib/bioinform/parsers/parser'
3
-
4
- module Bioinform
5
- describe Parser do
6
- context '#initialize' do
7
- it 'should accept an array correctly' do
8
- Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
9
- end
10
- it 'should treat several arguments as an array composed of them' do
11
- Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
12
- end
13
- it 'should treat one Array of numbers as an Array(with 1 element) of Arrays' do
14
- Parser.new([1,2,3,4]).parse.should == Parser.new([[1,2,3,4]]).parse
15
- end
16
- end
17
-
18
- context '::parse!' do
19
- it 'should behave like Parser.new(input).parse!' do
20
- Parser.parse!([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse!
21
- expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
22
- end
23
- end
24
-
25
- context '::parse' do
26
- it 'should behave like Parser.new(input).parse!' do
27
- Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
28
- Parser.parse([1,2,3],[4,5,6]).should be_nil
29
- end
30
- end
31
-
32
- context '::choose' do
33
- it 'should create parser of appropriate type' do
34
- Parser.choose([[1,2,3,4],[5,6,7,8]]).should be_kind_of(Parser)
35
- Parser.choose([[1,2,3,4],[5,6,7,8]]).input.should == [[1,2,3,4],[5,6,7,8]]
36
- Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').should be_kind_of(TrivialParser)
37
- Parser.choose(matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name').input.should == {matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}
38
- Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
39
- Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
40
- end
41
- end
42
-
43
- context '::split_on_motifs' do
44
- it 'should be able to get a single PM' do
45
- Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
46
- end
47
- end
48
-
49
- context '::normalize_hash_keys' do
50
- it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
51
- Parser.normalize_hash_keys( {a: 1, C: 2, 'g' => 3, 'T' => 4} ).should == {A: 1, C: 2, G: 3, T: 4}
52
- end
53
- end
54
-
55
- context '::need_transpose?' do
56
- it 'should point whether matrix have positions(need not be transposed -- false) or letters(true) as first index' do
57
- Parser.need_tranpose?([[1,3,5,7], [2,4,6,8]]).should be_false
58
- Parser.need_tranpose?([[1,2],[3,4],[5,6],[7,8]]).should be_true
59
- end
60
- end
61
- context '::array_from_acgt_hash' do
62
- it 'should convert hash of arrays to a transposed array of arrays' do
63
- input = {A: [1,2,3], C: [2,3,4], G: [3,4,5], T: [4,5,6]}
64
- Parser.array_from_acgt_hash(input).should == [[1,2,3], [2,3,4], [3,4,5], [4,5,6]].transpose
65
- end
66
- it 'should convert hash of numbers to an array of numbers' do
67
- input = {A: 1, C: 2, G: 3, T: 4}
68
- Parser.array_from_acgt_hash(input).should == [1,2,3,4]
69
- end
70
- it 'should process both symbolic and string keys, in both upcase and downcase' do
71
- input_normal_keys = {A: 1, C: 2, G: 3, T: 4}
72
- input_different_keys = {:A => 1, :c => 2, 'g' => 3, 'T' => 4}
73
- Parser.array_from_acgt_hash(input_different_keys).should == Parser.array_from_acgt_hash(input_normal_keys)
74
- end
75
- end
76
-
77
- context '::try_convert_to_array' do
78
- it 'should not change array' do
79
- inputs = []
80
- inputs << [[1,2,3,4], [2,3,4,5], [3,4,5,6]]
81
- inputs << [{A:1, C:2, G:3, T:4}, {A:2, C:3, G:4, T:5}, {A:3, C:4, G:5, T:6}]
82
- inputs.each do |input|
83
- Parser.try_convert_to_array( input ).should == input
84
- end
85
- end
86
- it 'should convert ACGT-Hashes to an array of positions (not letters)' do
87
- Parser.try_convert_to_array( {:A => [1,2,3], :c => [2,3,4], 'g' => [3,4,5], 'T' => [4,5,6]} ).should == [[1,2,3],[2,3,4],[3,4,5],[4,5,6]].transpose
88
- end
89
- end
90
-
91
- context '#parse' do
92
- it 'should give the same result as #parse!' do
93
- parser = Parser.new('stub parser')
94
- parser.stub(:parse!).and_return('stub result')
95
- parser.parse.should == 'stub result'
96
- end
97
- it 'should return nil if #parse! raised an exception' do
98
- parser = Parser.new('stub parser')
99
- parser.stub(:parse!).and_raise
100
- parser.parse.should be_nil
101
- end
102
- end
103
-
104
- good_cases = {
105
- 'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
106
- result: Fabricate(:pm_unnamed) },
107
-
108
- 'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
109
- result: Fabricate(:pm_unnamed) },
110
-
111
- 'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
112
- result: Fabricate(:pm_unnamed) },
113
-
114
- 'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
115
- result: Fabricate(:pm_unnamed) },
116
-
117
- 'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
118
- result: Fabricate(:pm_4x4_unnamed) },
119
-
120
- 'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
121
- result: Fabricate(:pm_4x4_unnamed) },
122
-
123
- '4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
124
- {:A => 5, :c => 6, 'g' => 7, 'T' => 8},
125
- {:A => 9, :c => 10, 'g' => 11, 'T' => 12},
126
- {:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
127
- result: Fabricate(:pm_4x4_unnamed) }
128
- }
129
-
130
- bad_cases = {
131
- 'Nil object on input' => {input: nil},
132
- 'Empty array on input' => {input: []},
133
- 'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
134
- 'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
135
- 'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
136
- 'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
137
- 'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
138
- 'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
139
- 'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
140
- 'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
141
- 'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
142
- 'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
143
- }
144
-
145
- parser_specs(Parser, good_cases, bad_cases)
146
- context '#parser!' do
147
- it "should raise an exception on parsing empty list to parser" do
148
- expect{ Parser.new().parse! }.to raise_error
149
- end
150
- end
151
- end
152
- end
@@ -1,70 +0,0 @@
1
- require_relative '../spec_helper'
2
- require_relative '../../lib/bioinform/parsers/string_fantom_parser'
3
-
4
- module Bioinform
5
- describe StringFantomParser do
6
- describe '#split_on_motifs' do
7
- it 'should be able to parse several motifs' do
8
- input = "
9
- //
10
- NA motif_1
11
- P0 A C G T
12
- P1 0 1 2 3
13
- P2 4 5 6 7
14
- //
15
- //
16
- NA motif_2
17
- P0 A C G T
18
- P1 1 2 3 4
19
- P2 5 6 7 8
20
- P3 9 10 11 12
21
- //
22
- NA motif_3
23
- P0 A C G T
24
- P1 2 3 4 5
25
- P2 6 7 8 9"
26
- StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
27
- end
28
- end
29
-
30
- good_cases = {
31
- 'string in Fantom-format' => {input: "
32
- NA PM_name
33
- P0 A C G T
34
- P1 1 2 3 4
35
- P2 5 6 7 8",
36
- result: Fabricate(:pm)
37
- },
38
-
39
- 'motif with additional rows' => {input: "
40
- NA PM_name
41
- P0 A C G T S P
42
- P1 1 2 3 4 5 10
43
- P2 5 6 7 8 5 11",
44
- result: Fabricate(:pm)
45
- },
46
-
47
- 'string with more than 10 positions(2-digit row numbers)' => {input: "
48
- NA PM_name
49
- P0 A C G T
50
- P1 1 2 3 4
51
- P2 5 6 7 8
52
- P3 1 2 3 4
53
- P4 5 6 7 8
54
- P5 1 2 3 4
55
- P6 5 6 7 8
56
- P7 1 2 3 4
57
- P8 5 6 7 8
58
- P9 1 2 3 4
59
- P10 5 6 7 8
60
- P11 1 2 3 4
61
- P12 5 6 7 8",
62
- result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
63
- }
64
- }
65
-
66
- bad_cases = { }
67
-
68
- parser_specs(StringFantomParser, good_cases, bad_cases)
69
- end
70
- end
@@ -1,77 +0,0 @@
1
- require_relative '../spec_helper'
2
- require_relative '../../lib/bioinform/parsers/string_parser'
3
-
4
- module Bioinform
5
- describe StringParser do
6
-
7
- describe '#each' do
8
- it 'should yield consequent results of #parse! while it returns result' do
9
- parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
10
- expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
11
- OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
12
- OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
13
- end
14
- it 'should restart parser from the beginning each time' do
15
- parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
16
- 3.times do
17
- expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
18
- OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
19
- OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
20
- end
21
- end
22
- end
23
-
24
- context '::split_on_motifs' do
25
- it 'should be able to get a single PM' do
26
- StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8").should == [ Fabricate(:pm_unnamed) ]
27
- end
28
- it 'should be able to split several PMs separated with an empty line' do
29
- StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n\n 15 16 17 18 \n 11 21 31 41").should ==
30
- [ Fabricate(:pm_first, name: nil), Fabricate(:pm_second, name: nil) ]
31
- end
32
- it 'should be able to split several PMs separated with name' do
33
- StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \nPM_second\n 15 16 17 18 \n 11 21 31 41").should ==
34
- [ Fabricate(:pm_first, name: nil), Fabricate(:pm_second) ]
35
- end
36
- it 'should be able to split several PMs separated with both name and empty line' do
37
- StringParser.split_on_motifs("PM_first\n1 2 3 4 \n 5 6 7 8 \n\nPM_second\n 15 16 17 18 \n 11 21 31 41\n\n\n").should ==
38
- [ Fabricate(:pm_first), Fabricate(:pm_second) ]
39
- end
40
- it 'should create PMs by default' do
41
- result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
42
- result.each{|pm| pm.class.should == PM}
43
- end
44
- it 'should create PM subclass when it\'s specified' do
45
- result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8", PWM)
46
- result.each{|pm| pm.class.should == PWM}
47
- end
48
- end
49
-
50
- good_cases = {
51
- 'Nx4 string' => {input: "1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
52
- '4xN string' => {input: "1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm_unnamed) },
53
- 'string with name' => {input: "PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
54
- 'string with name (with introduction sign)' => {input: ">\t PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
55
- 'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
56
- result: Fabricate(:pm, name: 'Testmatrix_first:subname+sub-subname') },
57
- 'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210", result: Fabricate(:pm_with_floats) },
58
- 'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3", result: Fabricate(:pm_with_floats) },
59
- 'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8", result: Fabricate(:pm_unnamed) },
60
- 'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n", result: Fabricate(:pm_unnamed) },
61
- 'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8", result: Fabricate(:pm_unnamed) },
62
- 'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
63
- 'Nx4 string with name and acgt-header' => {input: "PM_name\nA C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm)},
64
- 'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8", result: Fabricate(:pm_unnamed) },
65
- '4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n9 10 11 12\n13 14 15 16", result: Fabricate(:pm_4x4_unnamed) },
66
- '4x4 string with acgt-row-markers' => {input: "A|1 5 9 13\nC|2 6 10 14\nG|3 7 11 15\nT|4 8 12 16", result: Fabricate(:pm_4x4_unnamed) },
67
- '4x4 string with name and acgt-row-markers' => {input: "PM_name\nA:1 5 9 13\nC:2 6 10 14\nG:3 7 11 15\nT:4 8 12 16", result: Fabricate(:pm_4x4) }
68
- }
69
-
70
- bad_cases = {
71
- 'string with non-numeric input' => {input: "1.23 4.56 78aaa 9.0\n9 -8.7 6.54 -3210" },
72
- 'string with empty exponent sign' => {input: "1.23 4.56 7.8 9.0\n 9e -8.7 6.54 3210" }
73
- }
74
-
75
- parser_specs(StringParser, good_cases, bad_cases)
76
- end
77
- end