bioinform 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. data/TODO.txt +7 -2
  2. data/bin/merge_into_collection +4 -0
  3. data/bin/pcm2pwm +1 -1
  4. data/bin/split_motifs +1 -1
  5. data/bioinform.gemspec +2 -0
  6. data/lib/bioinform/cli/merge_into_collection.rb +76 -0
  7. data/lib/bioinform/cli/pcm2pwm.rb +20 -20
  8. data/lib/bioinform/cli/split_motifs.rb +21 -20
  9. data/lib/bioinform/cli.rb +16 -2
  10. data/lib/bioinform/data_models/collection.rb +13 -10
  11. data/lib/bioinform/data_models/pcm.rb +2 -2
  12. data/lib/bioinform/data_models/pm.rb +24 -37
  13. data/lib/bioinform/data_models/ppm.rb +2 -2
  14. data/lib/bioinform/data_models/pwm.rb +2 -2
  15. data/lib/bioinform/data_models.rb +8 -8
  16. data/lib/bioinform/parsers/parser.rb +10 -5
  17. data/lib/bioinform/parsers/splittable_parser.rb +57 -0
  18. data/lib/bioinform/parsers/string_fantom_parser.rb +3 -3
  19. data/lib/bioinform/parsers/string_parser.rb +5 -24
  20. data/lib/bioinform/parsers/trivial_parser.rb +19 -3
  21. data/lib/bioinform/parsers/yaml_parser.rb +35 -0
  22. data/lib/bioinform/parsers.rb +6 -4
  23. data/lib/bioinform/support/parameters.rb +19 -0
  24. data/lib/bioinform/support/partial_sums.rb +1 -1
  25. data/lib/bioinform/support.rb +11 -10
  26. data/lib/bioinform/version.rb +1 -1
  27. data/lib/bioinform.rb +5 -5
  28. data/spec/cli/cli_spec.rb +8 -7
  29. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -0
  30. data/spec/cli/data/{KLF4_f2.pwm.result → merge_into_collection/KLF4_f2.pwm} +0 -0
  31. data/spec/cli/data/{SP1_f1.pwm.result → merge_into_collection/SP1_f1.pwm} +0 -0
  32. data/spec/cli/data/merge_into_collection/collection.txt.result +40 -0
  33. data/spec/cli/data/merge_into_collection/collection.yaml.result +185 -0
  34. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +185 -0
  35. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -0
  36. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -0
  37. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -0
  38. data/spec/cli/data/{KLF4 f2 spaced name.pcm → pcm2pwm/KLF4 f2 spaced name.pcm} +0 -0
  39. data/spec/cli/data/{KLF4_f2.pcm → pcm2pwm/KLF4_f2.pcm} +0 -0
  40. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -0
  41. data/spec/cli/data/{SP1_f1.pcm → pcm2pwm/SP1_f1.pcm} +0 -0
  42. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -0
  43. data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -0
  44. data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -0
  45. data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -0
  46. data/spec/cli/data/split_motifs/collection.yaml +197 -0
  47. data/spec/cli/data/split_motifs/plain_collection.txt +38 -0
  48. data/spec/cli/merge_into_collection_spec.rb +100 -0
  49. data/spec/cli/pcm2pwm_spec.rb +3 -3
  50. data/spec/cli/split_motifs_spec.rb +74 -3
  51. data/spec/data_models/collection_spec.rb +2 -2
  52. data/spec/data_models/pcm_spec.rb +2 -2
  53. data/spec/data_models/pm_spec.rb +10 -27
  54. data/spec/data_models/ppm_spec.rb +2 -2
  55. data/spec/data_models/pwm_spec.rb +3 -3
  56. data/spec/fabricators/collection_fabricator.rb +8 -0
  57. data/spec/fabricators/pm_fabricator.rb +43 -0
  58. data/spec/parsers/parser_spec.rb +29 -37
  59. data/spec/parsers/string_fantom_parser_spec.rb +38 -35
  60. data/spec/parsers/string_parser_spec.rb +33 -66
  61. data/spec/parsers/trivial_parser_spec.rb +48 -6
  62. data/spec/parsers/yaml_parser_spec.rb +50 -0
  63. data/spec/spec_helper.rb +2 -6
  64. data/spec/support/advanced_scan_spec.rb +2 -2
  65. data/spec/support/array_product_spec.rb +2 -2
  66. data/spec/support/array_zip_spec.rb +2 -2
  67. data/spec/support/collect_hash_spec.rb +2 -2
  68. data/spec/support/delete_many_spec.rb +2 -2
  69. data/spec/support/inverf_spec.rb +2 -2
  70. data/spec/support/multiline_squish_spec.rb +2 -2
  71. data/spec/support/partial_sums_spec.rb +2 -2
  72. data/spec/support/same_by_spec.rb +2 -2
  73. metadata +86 -12
@@ -1,11 +1,11 @@
1
- require 'spec_helper'
2
- require 'bioinform/parsers/parser'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/parser'
3
3
 
4
4
  module Bioinform
5
5
  describe Parser do
6
6
  context '#initialize' do
7
7
  it 'should accept an array correctly' do
8
- Parser.new([[1,2,3,4],[5,6,7,8]]).parse[:matrix].should == [[1,2,3,4],[5,6,7,8]]
8
+ Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
9
9
  end
10
10
  it 'should treat several arguments as an array composed of them' do
11
11
  Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
@@ -21,6 +21,7 @@ module Bioinform
21
21
  expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
22
22
  end
23
23
  end
24
+
24
25
  context '::parse' do
25
26
  it 'should behave like Parser.new(input).parse!' do
26
27
  Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
@@ -37,9 +38,13 @@ module Bioinform
37
38
  Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
38
39
  Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
39
40
  end
40
-
41
41
  end
42
42
 
43
+ context '::split_on_motifs' do
44
+ it 'should be able to get a single PM' do
45
+ Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
46
+ end
47
+ end
43
48
 
44
49
  context '::normalize_hash_keys' do
45
50
  it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
@@ -97,56 +102,43 @@ module Bioinform
97
102
  end
98
103
 
99
104
  good_cases = {
100
- 'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
101
- matrix: [[0,1,2,3],[10,11,12,13]] },
105
+ 'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
106
+ result: Fabricate(:pm_unnamed) },
102
107
 
103
- 'Array 4xN' => {input: [[0,10],[1,11],[2,12],[3,13]],
104
- matrix: [[0,1,2,3],[10,11,12,13]] },
108
+ 'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
109
+ result: Fabricate(:pm_unnamed) },
105
110
 
106
- 'Hash A,C,G,T => Arrays' => { input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13]},
107
- matrix: [[0,1,2,3],[10,11,12,13]] },
111
+ 'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
112
+ result: Fabricate(:pm_unnamed) },
108
113
 
109
- 'Hash array of hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13}],
110
- matrix: [[0,1,2,3],[10,11,12,13]] },
114
+ 'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
115
+ result: Fabricate(:pm_unnamed) },
111
116
 
112
- 'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]],
113
- matrix: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]] },
117
+ 'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
118
+ result: Fabricate(:pm_4x4_unnamed) },
114
119
 
115
- 'Hash A,C,G,T => 4-Arrays' => { input: {:A => [0,10,100,1000], :c => [1,11,101,1001],'g' => [2,12,102,1002],'T' => [3,13,103,1003]},
116
- matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] },
120
+ 'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
121
+ result: Fabricate(:pm_4x4_unnamed) },
117
122
 
118
- '4-Arrays of A,C,G,T hashes' => { input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
119
- {:A => 10, :c => 11, 'g' => 12, 'T' => 13},
120
- {:A => 100, :c => 101, 'g' => 102, 'T' => 103},
121
- {:A => 1000, :c => 1001, 'g' => 1002, 'T' => 1003}],
122
- matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] }
123
+ '4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
124
+ {:A => 5, :c => 6, 'g' => 7, 'T' => 8},
125
+ {:A => 9, :c => 10, 'g' => 11, 'T' => 12},
126
+ {:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
127
+ result: Fabricate(:pm_4x4_unnamed) }
123
128
  }
124
129
 
125
130
  bad_cases = {
126
131
  'Nil object on input' => {input: nil},
127
-
128
132
  'Empty array on input' => {input: []},
129
-
130
133
  'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
131
-
132
134
  'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
133
-
134
135
  'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
135
-
136
- 'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
137
- {:A => 10, :c => 11, 'g' => 12}] },
138
- 'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
139
- {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
140
-
141
- 'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3},
142
- {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
143
-
136
+ 'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
137
+ 'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
138
+ 'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
144
139
  'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
145
-
146
140
  'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
147
-
148
141
  'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
149
-
150
142
  'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
151
143
  }
152
144
 
@@ -1,16 +1,16 @@
1
- require 'spec_helper'
2
- require 'bioinform/parsers/string_fantom_parser'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/string_fantom_parser'
3
3
 
4
4
  module Bioinform
5
5
  describe StringFantomParser do
6
- describe '#parse' do
6
+ describe '#split_on_motifs' do
7
7
  it 'should be able to parse several motifs' do
8
- input = <<-EOS
8
+ input = "
9
9
  //
10
10
  NA motif_1
11
11
  P0 A C G T
12
- P1 0 1 2 3
13
- P2 4 5 6 7
12
+ P1 0 1 2 3
13
+ P2 4 5 6 7
14
14
  //
15
15
  //
16
16
  NA motif_2
@@ -22,41 +22,44 @@ P3 9 10 11 12
22
22
  NA motif_3
23
23
  P0 A C G T
24
24
  P1 2 3 4 5
25
- P2 6 7 8 9
26
- EOS
27
- StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'},
28
- {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name: 'motif_2'},
29
- {matrix: [[2,3,4,5],[6,7,8,9]], name: 'motif_3'} ]
30
- end
31
-
32
- it 'should be able to parse motif with additional rows' do
33
- input = <<-EOS
34
- NA motif_1
35
- P0 A C G T S P
36
- P1 0 1 2 3 5 10
37
- P2 4 5 6 7 5 11
38
- EOS
39
- StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'} ]
25
+ P2 6 7 8 9"
26
+ StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
40
27
  end
41
28
  end
42
29
 
43
30
  good_cases = {
44
31
  'string in Fantom-format' => {input: "
45
- NA motif_CTNCAG
32
+ NA PM_name
33
+ P0 A C G T
34
+ P1 1 2 3 4
35
+ P2 5 6 7 8",
36
+ result: Fabricate(:pm)
37
+ },
38
+
39
+ 'motif with additional rows' => {input: "
40
+ NA PM_name
41
+ P0 A C G T S P
42
+ P1 1 2 3 4 5 10
43
+ P2 5 6 7 8 5 11",
44
+ result: Fabricate(:pm)
45
+ },
46
+
47
+ 'string with more than 10 positions(2-digit row numbers)' => {input: "
48
+ NA PM_name
46
49
  P0 A C G T
47
- P1 0 1878368 0 0
48
- P2 0 0 0 1878368
49
- P3 469592 469592 469592 469592
50
- P4 0 1878368 0 0
51
- P5 1878368 0 0 0
52
- P6 0 0 1878368 0",
53
- matrix: [ [0.0, 1878368.0, 0.0, 0.0],
54
- [0.0, 0.0, 0.0, 1878368.0],
55
- [469592.0, 469592.0, 469592.0, 469592.0],
56
- [0.0, 1878368.0, 0.0, 0.0],
57
- [1878368.0, 0.0, 0.0, 0.0],
58
- [0.0, 0.0, 1878368.0, 0.0]],
59
- name: 'motif_CTNCAG'
50
+ P1 1 2 3 4
51
+ P2 5 6 7 8
52
+ P3 1 2 3 4
53
+ P4 5 6 7 8
54
+ P5 1 2 3 4
55
+ P6 5 6 7 8
56
+ P7 1 2 3 4
57
+ P8 5 6 7 8
58
+ P9 1 2 3 4
59
+ P10 5 6 7 8
60
+ P11 1 2 3 4
61
+ P12 5 6 7 8",
62
+ result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
60
63
  }
61
64
  }
62
65
 
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/parsers/string_parser'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/string_parser'
3
3
 
4
4
  module Bioinform
5
5
  describe StringParser do
@@ -7,37 +7,35 @@ module Bioinform
7
7
  describe '#each' do
8
8
  it 'should yield consequent results of #parse! while it returns result' do
9
9
  parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
10
- expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
10
+ expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
11
+ OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
12
+ OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
11
13
  end
12
14
  it 'should restart parser from the beginning each time' do
13
15
  parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
14
16
  3.times do
15
- expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
17
+ expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
18
+ OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
19
+ OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
16
20
  end
17
21
  end
18
22
  end
19
23
 
20
- context '::split' do
24
+ context '::split_on_motifs' do
21
25
  it 'should be able to get a single PM' do
22
- StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12").should == [ {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name:nil} ]
26
+ StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8").should == [ Fabricate(:pm_unnamed) ]
23
27
  end
24
-
25
28
  it 'should be able to split several PMs separated with an empty line' do
26
- StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:nil} ]
29
+ StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n\n 15 16 17 18 \n 11 21 31 41").should ==
30
+ [ Fabricate(:pm_first, name: nil), Fabricate(:pm_second, name: nil) ]
27
31
  end
28
-
29
32
  it 'should be able to split several PMs separated with name' do
30
- StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
31
-
32
- StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
33
+ StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \nPM_second\n 15 16 17 18 \n 11 21 31 41").should ==
34
+ [ Fabricate(:pm_first, name: nil), Fabricate(:pm_second) ]
33
35
  end
34
- end
35
-
36
- context '::split_on_motifs' do
37
- it 'should be able to split string into PMs' do
38
- result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
39
- result.map{|pm| pm.matrix}.should == [ [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]] ]
40
- result.map{|pm| pm.name}.should == [nil, 'Name']
36
+ it 'should be able to split several PMs separated with both name and empty line' do
37
+ StringParser.split_on_motifs("PM_first\n1 2 3 4 \n 5 6 7 8 \n\nPM_second\n 15 16 17 18 \n 11 21 31 41\n\n\n").should ==
38
+ [ Fabricate(:pm_first), Fabricate(:pm_second) ]
41
39
  end
42
40
  it 'should create PMs by default' do
43
41
  result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
@@ -50,54 +48,23 @@ module Bioinform
50
48
  end
51
49
 
52
50
  good_cases = {
53
- 'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
54
- matrix: [[1,2,3,4],[5,6,7,8]] },
55
-
56
- '4xN string' => {input: "1 5\n2 6\n3 7\n 4 8",
57
- matrix: [[1,2,3,4],[5,6,7,8]] },
58
-
59
- 'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
60
- matrix: [[1,2,3,4],[5,6,7,8]], name: 'TestMatrix' },
61
-
62
- 'string with name (with introduction sign)' => {input: ">\t TestMatrix\n1 5\n2 6\n3 7\n 4 8",
63
- matrix: [[1,2,3,4],[5,6,7,8]],
64
- name: 'TestMatrix' },
65
-
51
+ 'Nx4 string' => {input: "1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
52
+ '4xN string' => {input: "1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm_unnamed) },
53
+ 'string with name' => {input: "PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
54
+ 'string with name (with introduction sign)' => {input: ">\t PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
66
55
  'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
67
- matrix: [[1,2,3,4],[5,6,7,8]], name: 'Testmatrix_first:subname+sub-subname' },
68
-
69
- 'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210",
70
- matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
71
-
72
- 'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3",
73
- matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
74
-
75
- 'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8",
76
- matrix: [[1,2,3,4],[5,6,7,8]] },
77
-
78
- 'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n",
79
- matrix: [[1,2,3,4],[5,6,7,8]] },
80
-
81
- 'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",
82
- matrix: [[1,2,3,4],[5,6,7,8]] },
83
-
84
- 'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8",
85
- matrix: [[1,2,3,4],[5,6,7,8]] },
86
-
87
- 'Nx4 string with name and acgt-header' => {input: "Name\nA C G T\n1 2 3 4\n5 6 7 8",
88
- matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'},
89
-
90
- 'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8",
91
- matrix: [[1,2,3,4],[5,6,7,8]] },
92
-
93
- '4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n0 0 0 0\n2 2 2 2",
94
- matrix: [[1,2,3,4],[5,6,7,8],[0,0,0,0],[2,2,2,2]] },
95
-
96
- '4x4 string with acgt-row-markers' => {input: "A|1 2 3 4\nC|5 6 7 8\nG|0 0 0 0\nT|2 2 2 2",
97
- matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]] },
98
-
99
- '4x4 string with name and acgt-row-markers' => {input: "Name\nA:1 2 3 4\nC:5 6 7 8\nG:0 0 0 0\nT:2 2 2 2",
100
- matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]], name: 'Name' }
56
+ result: Fabricate(:pm, name: 'Testmatrix_first:subname+sub-subname') },
57
+ 'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210", result: Fabricate(:pm_with_floats) },
58
+ 'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3", result: Fabricate(:pm_with_floats) },
59
+ 'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8", result: Fabricate(:pm_unnamed) },
60
+ 'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n", result: Fabricate(:pm_unnamed) },
61
+ 'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8", result: Fabricate(:pm_unnamed) },
62
+ 'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
63
+ 'Nx4 string with name and acgt-header' => {input: "PM_name\nA C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm)},
64
+ 'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8", result: Fabricate(:pm_unnamed) },
65
+ '4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n9 10 11 12\n13 14 15 16", result: Fabricate(:pm_4x4_unnamed) },
66
+ '4x4 string with acgt-row-markers' => {input: "A|1 5 9 13\nC|2 6 10 14\nG|3 7 11 15\nT|4 8 12 16", result: Fabricate(:pm_4x4_unnamed) },
67
+ '4x4 string with name and acgt-row-markers' => {input: "PM_name\nA:1 5 9 13\nC:2 6 10 14\nG:3 7 11 15\nT:4 8 12 16", result: Fabricate(:pm_4x4) }
101
68
  }
102
69
 
103
70
  bad_cases = {
@@ -1,5 +1,6 @@
1
- require 'spec_helper'
2
- require 'bioinform/parsers/parser'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/parser'
3
+ require_relative '../../lib/bioinform/data_models/collection'
3
4
 
4
5
  module Bioinform
5
6
  describe TrivialParser do
@@ -8,15 +9,56 @@ module Bioinform
8
9
  TrivialParser.instance_method(:initialize).arity.should == 1
9
10
  end
10
11
  end
11
- context '#parser!' do
12
- it 'should return input of that was passed to initialize' do
13
- TrivialParser.new('stub input').parse!.should == 'stub input'
12
+
13
+ context '#parse!' do
14
+ it 'should return OpenStruct based on input of that was passed to initialize when input is a Hash' do
15
+ TrivialParser.new(matrix: 'stub matrix', name: 'stub name').parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
16
+ end
17
+
18
+ it 'should return OpenStruct based on input of that was passed to initialize when input is a OpenStruct' do
19
+ TrivialParser.new(OpenStruct.new(matrix: 'stub matrix', name: 'stub name')).parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
14
20
  end
15
21
  end
22
+
23
+ context '::split_on_motifs' do
24
+ it 'should be able to get a single PM' do
25
+ TrivialParser.split_on_motifs({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:'Name') ]
26
+ end
27
+ end
28
+
16
29
  it 'can be used to create PM with {matrix: ..., name: ...} form' do
17
30
  pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
18
31
  pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
19
32
  pm.name.should == 'Name'
20
33
  end
34
+
35
+ it 'can be used to create PM from PM (make copy)' do
36
+ pm = Fabricate(:pm)
37
+ pm_copy = PM.new(pm, TrivialParser)
38
+ pm_copy.should == pm
39
+ end
40
+ end
41
+
42
+ describe TrivialCollectionParser do
43
+ before :each do
44
+ @pm_1 = Fabricate(:pm_first)
45
+ @pm_2 = Fabricate(:pm_second)
46
+ @collection = Fabricate(:two_elements_collection)
47
+ end
48
+
49
+ describe '#parse!' do
50
+ it 'can be used to obtain PMs from Collection' do
51
+ @parser = TrivialCollectionParser.new(@collection)
52
+ @parser.parse!.should == @pm_1
53
+ @parser.parse!.should == @pm_2
54
+ expect{ @parser.parse! }.to raise_error
55
+ end
56
+ end
57
+
58
+ describe '::split_on_motifs' do
59
+ it 'should be able to split collection into PMs' do
60
+ TrivialCollectionParser.split_on_motifs(@collection).should == [@pm_1, @pm_2]
61
+ end
62
+ end
21
63
  end
22
- end
64
+ end
@@ -0,0 +1,50 @@
1
+ require 'yaml'
2
+ require_relative '../spec_helper'
3
+ require_relative '../../lib/bioinform/parsers/yaml_parser'
4
+ require_relative '../../lib/bioinform/data_models/collection'
5
+
6
+ module Bioinform
7
+ describe YAMLParser do
8
+ context '#parse!' do
9
+ it 'should return PM that was encoded in YAML format' do
10
+ pm = Fabricate(:pm)
11
+ parser = YAMLParser.new(pm.to_yaml)
12
+ parser.parse!.should == pm
13
+ end
14
+ end
15
+ it 'can be used to create PM from yaml-string' do
16
+ pm = Fabricate(:pm)
17
+ pm_copy = PM.new(pm.to_yaml, YAMLParser)
18
+ pm_copy.should == pm
19
+ end
20
+
21
+ context '::split_on_motifs' do
22
+ it 'should be able to get a single PM' do
23
+ pm = Fabricate(:pm)
24
+ YAMLParser.split_on_motifs(pm.to_yaml, PM).should == [ pm ]
25
+ end
26
+ end
27
+ end
28
+
29
+ describe YAMLCollectionParser do
30
+ before :each do
31
+ @pm_1 = Fabricate(:pm_first)
32
+ @pm_2 = Fabricate(:pm_second)
33
+ @collection = Collection.new
34
+ @collection << @pm_1 << @pm_2
35
+ end
36
+ context '::split_on_motifs' do
37
+ it 'should be able to split collection into PMs' do
38
+ YAMLCollectionParser.split_on_motifs(@collection.to_yaml).should == [@pm_1, @pm_2]
39
+ end
40
+ end
41
+ context '#parse!' do
42
+ it 'should return PMs which were in encoded YAML format' do
43
+ @parser = YAMLCollectionParser.new(@collection.to_yaml)
44
+ @parser.parse!.should == @pm_1
45
+ @parser.parse!.should == @pm_2
46
+ expect{ @parser.parse! }.to raise_error
47
+ end
48
+ end
49
+ end
50
+ end
data/spec/spec_helper.rb CHANGED
@@ -5,6 +5,7 @@ require 'rspec'
5
5
 
6
6
  require 'fileutils'
7
7
  require 'stringio'
8
+ require 'fabrication'
8
9
 
9
10
  # from minitest
10
11
  def capture_io(&block)
@@ -39,12 +40,7 @@ def parser_specs(parser_klass, good_cases, bad_cases)
39
40
  good_cases.each do |case_description, input_and_result|
40
41
  it "should be able to parse #{case_description}" do
41
42
  result = parser_klass.new(input_and_result[:input]).parse
42
- result[:matrix].should == input_and_result[:matrix]
43
- if input_and_result.has_key?(:name)
44
- result[:name].should == input_and_result[:name]
45
- else
46
- result[:name].should be_nil
47
- end
43
+ Bioinform::PM.new(result).should == input_and_result[:result]
48
44
  end
49
45
  end
50
46
 
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/advanced_scan'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/advanced_scan'
3
3
 
4
4
  describe StringScanner do
5
5
  context '#advanced_scan' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/array_product'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/array_product'
3
3
 
4
4
  describe Array do
5
5
  context '::product' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/array_zip'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/array_zip'
3
3
 
4
4
  describe Array do
5
5
  context '::zip' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/collect_hash'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/collect_hash'
3
3
 
4
4
  describe Enumerable do
5
5
  # %w{A C G T}.collect_hash{|k| [k*2, k*3] }
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/delete_many'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/delete_many'
3
3
 
4
4
  describe Array do
5
5
  before :each do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/inverf'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/inverf'
3
3
 
4
4
  describe 'Math#inverf' do
5
5
  it 'should be erf(inverf(x)) == x' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/multiline_squish'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/multiline_squish'
3
3
 
4
4
  describe String do
5
5
  describe '#multiline_squish' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/partial_sums'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/partial_sums'
3
3
 
4
4
  describe 'Array#partial_sums' do
5
5
  context 'when no initial value given' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/same_by'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/same_by'
3
3
 
4
4
  describe Enumerable do
5
5
  describe '#same_by?' do