bioinform 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. data/TODO.txt +7 -2
  2. data/bin/merge_into_collection +4 -0
  3. data/bin/pcm2pwm +1 -1
  4. data/bin/split_motifs +1 -1
  5. data/bioinform.gemspec +2 -0
  6. data/lib/bioinform/cli/merge_into_collection.rb +76 -0
  7. data/lib/bioinform/cli/pcm2pwm.rb +20 -20
  8. data/lib/bioinform/cli/split_motifs.rb +21 -20
  9. data/lib/bioinform/cli.rb +16 -2
  10. data/lib/bioinform/data_models/collection.rb +13 -10
  11. data/lib/bioinform/data_models/pcm.rb +2 -2
  12. data/lib/bioinform/data_models/pm.rb +24 -37
  13. data/lib/bioinform/data_models/ppm.rb +2 -2
  14. data/lib/bioinform/data_models/pwm.rb +2 -2
  15. data/lib/bioinform/data_models.rb +8 -8
  16. data/lib/bioinform/parsers/parser.rb +10 -5
  17. data/lib/bioinform/parsers/splittable_parser.rb +57 -0
  18. data/lib/bioinform/parsers/string_fantom_parser.rb +3 -3
  19. data/lib/bioinform/parsers/string_parser.rb +5 -24
  20. data/lib/bioinform/parsers/trivial_parser.rb +19 -3
  21. data/lib/bioinform/parsers/yaml_parser.rb +35 -0
  22. data/lib/bioinform/parsers.rb +6 -4
  23. data/lib/bioinform/support/parameters.rb +19 -0
  24. data/lib/bioinform/support/partial_sums.rb +1 -1
  25. data/lib/bioinform/support.rb +11 -10
  26. data/lib/bioinform/version.rb +1 -1
  27. data/lib/bioinform.rb +5 -5
  28. data/spec/cli/cli_spec.rb +8 -7
  29. data/spec/cli/data/merge_into_collection/GABPA_f1.pwm +14 -0
  30. data/spec/cli/data/{KLF4_f2.pwm.result → merge_into_collection/KLF4_f2.pwm} +0 -0
  31. data/spec/cli/data/{SP1_f1.pwm.result → merge_into_collection/SP1_f1.pwm} +0 -0
  32. data/spec/cli/data/merge_into_collection/collection.txt.result +40 -0
  33. data/spec/cli/data/merge_into_collection/collection.yaml.result +185 -0
  34. data/spec/cli/data/merge_into_collection/collection_pwm.yaml.result +185 -0
  35. data/spec/cli/data/merge_into_collection/pwm_folder/GABPA_f1.pwm +14 -0
  36. data/spec/cli/data/merge_into_collection/pwm_folder/KLF4_f2.pwm +11 -0
  37. data/spec/cli/data/merge_into_collection/pwm_folder/SP1_f1.pwm +12 -0
  38. data/spec/cli/data/{KLF4 f2 spaced name.pcm → pcm2pwm/KLF4 f2 spaced name.pcm} +0 -0
  39. data/spec/cli/data/{KLF4_f2.pcm → pcm2pwm/KLF4_f2.pcm} +0 -0
  40. data/spec/cli/data/pcm2pwm/KLF4_f2.pwm.result +11 -0
  41. data/spec/cli/data/{SP1_f1.pcm → pcm2pwm/SP1_f1.pcm} +0 -0
  42. data/spec/cli/data/pcm2pwm/SP1_f1.pwm.result +12 -0
  43. data/spec/cli/data/split_motifs/GABPA_f1.mat.result +14 -0
  44. data/spec/cli/data/split_motifs/KLF4_f2.mat.result +11 -0
  45. data/spec/cli/data/split_motifs/SP1_f1.mat.result +12 -0
  46. data/spec/cli/data/split_motifs/collection.yaml +197 -0
  47. data/spec/cli/data/split_motifs/plain_collection.txt +38 -0
  48. data/spec/cli/merge_into_collection_spec.rb +100 -0
  49. data/spec/cli/pcm2pwm_spec.rb +3 -3
  50. data/spec/cli/split_motifs_spec.rb +74 -3
  51. data/spec/data_models/collection_spec.rb +2 -2
  52. data/spec/data_models/pcm_spec.rb +2 -2
  53. data/spec/data_models/pm_spec.rb +10 -27
  54. data/spec/data_models/ppm_spec.rb +2 -2
  55. data/spec/data_models/pwm_spec.rb +3 -3
  56. data/spec/fabricators/collection_fabricator.rb +8 -0
  57. data/spec/fabricators/pm_fabricator.rb +43 -0
  58. data/spec/parsers/parser_spec.rb +29 -37
  59. data/spec/parsers/string_fantom_parser_spec.rb +38 -35
  60. data/spec/parsers/string_parser_spec.rb +33 -66
  61. data/spec/parsers/trivial_parser_spec.rb +48 -6
  62. data/spec/parsers/yaml_parser_spec.rb +50 -0
  63. data/spec/spec_helper.rb +2 -6
  64. data/spec/support/advanced_scan_spec.rb +2 -2
  65. data/spec/support/array_product_spec.rb +2 -2
  66. data/spec/support/array_zip_spec.rb +2 -2
  67. data/spec/support/collect_hash_spec.rb +2 -2
  68. data/spec/support/delete_many_spec.rb +2 -2
  69. data/spec/support/inverf_spec.rb +2 -2
  70. data/spec/support/multiline_squish_spec.rb +2 -2
  71. data/spec/support/partial_sums_spec.rb +2 -2
  72. data/spec/support/same_by_spec.rb +2 -2
  73. metadata +86 -12
@@ -1,11 +1,11 @@
1
- require 'spec_helper'
2
- require 'bioinform/parsers/parser'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/parser'
3
3
 
4
4
  module Bioinform
5
5
  describe Parser do
6
6
  context '#initialize' do
7
7
  it 'should accept an array correctly' do
8
- Parser.new([[1,2,3,4],[5,6,7,8]]).parse[:matrix].should == [[1,2,3,4],[5,6,7,8]]
8
+ Parser.new([[1,2,3,4],[5,6,7,8]]).parse.matrix.should == [[1,2,3,4],[5,6,7,8]]
9
9
  end
10
10
  it 'should treat several arguments as an array composed of them' do
11
11
  Parser.new([1,2,3,4],[5,6,7,8]).parse.should == Parser.new([[1,2,3,4],[5,6,7,8]]).parse
@@ -21,6 +21,7 @@ module Bioinform
21
21
  expect{ Parser.parse!([1,2,3],[4,5,6]) }.to raise_error
22
22
  end
23
23
  end
24
+
24
25
  context '::parse' do
25
26
  it 'should behave like Parser.new(input).parse!' do
26
27
  Parser.parse([1,2,3,4],[5,6,7,8]).should == Parser.new([1,2,3,4],[5,6,7,8]).parse
@@ -37,9 +38,13 @@ module Bioinform
37
38
  Parser.choose("1 2 3 4\n5 6 7 8").should be_kind_of(StringParser)
38
39
  Parser.choose("1 2 3 4\n5 6 7 8").input.should == "1 2 3 4\n5 6 7 8"
39
40
  end
40
-
41
41
  end
42
42
 
43
+ context '::split_on_motifs' do
44
+ it 'should be able to get a single PM' do
45
+ Parser.split_on_motifs([[1,2,3,4],[5,6,7,8]], PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:nil) ]
46
+ end
47
+ end
43
48
 
44
49
  context '::normalize_hash_keys' do
45
50
  it 'should convert both symbolic and string keys, in both upcase and downcase to symbolic upcases' do
@@ -97,56 +102,43 @@ module Bioinform
97
102
  end
98
103
 
99
104
  good_cases = {
100
- 'Array Nx4' => {input: [[0,1,2,3],[10,11,12,13]],
101
- matrix: [[0,1,2,3],[10,11,12,13]] },
105
+ 'Array Nx4' => {input: [[1,2,3,4],[5,6,7,8]],
106
+ result: Fabricate(:pm_unnamed) },
102
107
 
103
- 'Array 4xN' => {input: [[0,10],[1,11],[2,12],[3,13]],
104
- matrix: [[0,1,2,3],[10,11,12,13]] },
108
+ 'Array 4xN' => {input: [[1,5],[2,6],[3,7],[4,8]],
109
+ result: Fabricate(:pm_unnamed) },
105
110
 
106
- 'Hash A,C,G,T => Arrays' => { input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13]},
107
- matrix: [[0,1,2,3],[10,11,12,13]] },
111
+ 'Hash A,C,G,T => Arrays' => { input: {:A => [1,5], :c => [2,6],'g' => [3,7],'T' => [4,8]},
112
+ result: Fabricate(:pm_unnamed) },
108
113
 
109
- 'Hash array of hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13}],
110
- matrix: [[0,1,2,3],[10,11,12,13]] },
114
+ 'Hash array of hashes' => { input: [{:A => 1,:c => 2,'g' => 3,'T' => 4}, {:A => 5,:c => 6,'g' => 7,'T' => 8}],
115
+ result: Fabricate(:pm_unnamed) },
111
116
 
112
- 'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]],
113
- matrix: [[0,1,2,3],[4,5,6,7],[8,9,10,11],[12,13,14,15]] },
117
+ 'Array 4x4 (rows treated as positions, columns are treated as letter)' => { input: [[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16]],
118
+ result: Fabricate(:pm_4x4_unnamed) },
114
119
 
115
- 'Hash A,C,G,T => 4-Arrays' => { input: {:A => [0,10,100,1000], :c => [1,11,101,1001],'g' => [2,12,102,1002],'T' => [3,13,103,1003]},
116
- matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] },
120
+ 'Hash A,C,G,T => 4-Arrays' => { input: {:A => [1,5,9,13], :c => [2,6,10,14],'g' => [3,7,11,15],'T' => [4,8,12,16]},
121
+ result: Fabricate(:pm_4x4_unnamed) },
117
122
 
118
- '4-Arrays of A,C,G,T hashes' => { input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
119
- {:A => 10, :c => 11, 'g' => 12, 'T' => 13},
120
- {:A => 100, :c => 101, 'g' => 102, 'T' => 103},
121
- {:A => 1000, :c => 1001, 'g' => 1002, 'T' => 1003}],
122
- matrix: [[0,1,2,3],[10,11,12,13],[100,101,102,103],[1000,1001,1002,1003]] }
123
+ '4-Arrays of A,C,G,T hashes' => { input: [{:A => 1, :c => 2, 'g' => 3, 'T' => 4},
124
+ {:A => 5, :c => 6, 'g' => 7, 'T' => 8},
125
+ {:A => 9, :c => 10, 'g' => 11, 'T' => 12},
126
+ {:A => 13, :c => 14, 'g' => 15, 'T' => 16}],
127
+ result: Fabricate(:pm_4x4_unnamed) }
123
128
  }
124
129
 
125
130
  bad_cases = {
126
131
  'Nil object on input' => {input: nil},
127
-
128
132
  'Empty array on input' => {input: []},
129
-
130
133
  'Different sizes of row arrays' => {input: [[1,2,3,4],[5,6,7,8,9]] },
131
-
132
134
  'Different sizes of column arrays' => {input: [[0,10],[1,11],[2,12],[3]] },
133
-
134
135
  'No one dimension have size 4' => {input: [[0,1,2,3,4],[10,11,12,13,14], [0,1,2,3,4]] },
135
-
136
- 'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
137
- {:A => 10, :c => 11, 'g' => 12}] },
138
- 'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3},
139
- {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
140
-
141
- 'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3},
142
- {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
143
-
136
+ 'Missing keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12}] },
137
+ 'Bad keys in column hashes' => {input: [{:A => 0, :c => 1, 'g' => 2, 'T' => 3}, {:A => 10, :c => 11, 'g' => 12, :X =>1000}] },
138
+ 'Excessing keys in column hashes' => { input: [{:A => 0,:c => 1,'g' => 2,'T' => 3}, {:A => 10,:c => 11,'g' => 12,'T' => 13, :X => 1000}] },
144
139
  'Different sizes of row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12],'T' => [3,13,14]} },
145
-
146
140
  'Missing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12]} },
147
-
148
141
  'Wrong keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'X' => [2,12]} },
149
-
150
142
  'Excessing keys in row hashes' => {input: {:A => [0,10], :c => [1,11],'g' => [2,12], 'T' => [3,12], :X => [4,14]} }
151
143
  }
152
144
 
@@ -1,16 +1,16 @@
1
- require 'spec_helper'
2
- require 'bioinform/parsers/string_fantom_parser'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/string_fantom_parser'
3
3
 
4
4
  module Bioinform
5
5
  describe StringFantomParser do
6
- describe '#parse' do
6
+ describe '#split_on_motifs' do
7
7
  it 'should be able to parse several motifs' do
8
- input = <<-EOS
8
+ input = "
9
9
  //
10
10
  NA motif_1
11
11
  P0 A C G T
12
- P1 0 1 2 3
13
- P2 4 5 6 7
12
+ P1 0 1 2 3
13
+ P2 4 5 6 7
14
14
  //
15
15
  //
16
16
  NA motif_2
@@ -22,41 +22,44 @@ P3 9 10 11 12
22
22
  NA motif_3
23
23
  P0 A C G T
24
24
  P1 2 3 4 5
25
- P2 6 7 8 9
26
- EOS
27
- StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'},
28
- {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name: 'motif_2'},
29
- {matrix: [[2,3,4,5],[6,7,8,9]], name: 'motif_3'} ]
30
- end
31
-
32
- it 'should be able to parse motif with additional rows' do
33
- input = <<-EOS
34
- NA motif_1
35
- P0 A C G T S P
36
- P1 0 1 2 3 5 10
37
- P2 4 5 6 7 5 11
38
- EOS
39
- StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'} ]
25
+ P2 6 7 8 9"
26
+ StringFantomParser.split_on_motifs(input).should == [ Fabricate(:pm_1), Fabricate(:pm_2), Fabricate(:pm_3) ]
40
27
  end
41
28
  end
42
29
 
43
30
  good_cases = {
44
31
  'string in Fantom-format' => {input: "
45
- NA motif_CTNCAG
32
+ NA PM_name
33
+ P0 A C G T
34
+ P1 1 2 3 4
35
+ P2 5 6 7 8",
36
+ result: Fabricate(:pm)
37
+ },
38
+
39
+ 'motif with additional rows' => {input: "
40
+ NA PM_name
41
+ P0 A C G T S P
42
+ P1 1 2 3 4 5 10
43
+ P2 5 6 7 8 5 11",
44
+ result: Fabricate(:pm)
45
+ },
46
+
47
+ 'string with more than 10 positions(2-digit row numbers)' => {input: "
48
+ NA PM_name
46
49
  P0 A C G T
47
- P1 0 1878368 0 0
48
- P2 0 0 0 1878368
49
- P3 469592 469592 469592 469592
50
- P4 0 1878368 0 0
51
- P5 1878368 0 0 0
52
- P6 0 0 1878368 0",
53
- matrix: [ [0.0, 1878368.0, 0.0, 0.0],
54
- [0.0, 0.0, 0.0, 1878368.0],
55
- [469592.0, 469592.0, 469592.0, 469592.0],
56
- [0.0, 1878368.0, 0.0, 0.0],
57
- [1878368.0, 0.0, 0.0, 0.0],
58
- [0.0, 0.0, 1878368.0, 0.0]],
59
- name: 'motif_CTNCAG'
50
+ P1 1 2 3 4
51
+ P2 5 6 7 8
52
+ P3 1 2 3 4
53
+ P4 5 6 7 8
54
+ P5 1 2 3 4
55
+ P6 5 6 7 8
56
+ P7 1 2 3 4
57
+ P8 5 6 7 8
58
+ P9 1 2 3 4
59
+ P10 5 6 7 8
60
+ P11 1 2 3 4
61
+ P12 5 6 7 8",
62
+ result: Fabricate(:pm, matrix: [[1,2,3,4],[5,6,7,8]] * 6 )
60
63
  }
61
64
  }
62
65
 
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/parsers/string_parser'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/string_parser'
3
3
 
4
4
  module Bioinform
5
5
  describe StringParser do
@@ -7,37 +7,35 @@ module Bioinform
7
7
  describe '#each' do
8
8
  it 'should yield consequent results of #parse! while it returns result' do
9
9
  parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
10
- expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
10
+ expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
11
+ OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
12
+ OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
11
13
  end
12
14
  it 'should restart parser from the beginning each time' do
13
15
  parser = StringParser.new("1 2 3 4\n5 6 7 8\n\n1 2 3 4\n1 2 3 4\nName\n4 3 2 1\n1 1 1 1\n0 0 0 0")
14
16
  3.times do
15
- expect{|b| parser.each(&b)}.to yield_successive_args({matrix:[[1,2,3,4],[5,6,7,8]], name:nil}, {matrix:[[1,2,3,4],[1,2,3,4]], name:nil}, {matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name'} )
17
+ expect{|b| parser.each(&b)}.to yield_successive_args(OpenStruct.new(matrix:[[1,2,3,4],[5,6,7,8]], name:nil),
18
+ OpenStruct.new(matrix:[[1,2,3,4],[1,2,3,4]], name:nil),
19
+ OpenStruct.new(matrix:[[4,3,2,1],[1,1,1,1],[0,0,0,0]], name:'Name') )
16
20
  end
17
21
  end
18
22
  end
19
23
 
20
- context '::split' do
24
+ context '::split_on_motifs' do
21
25
  it 'should be able to get a single PM' do
22
- StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12").should == [ {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name:nil} ]
26
+ StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8").should == [ Fabricate(:pm_unnamed) ]
23
27
  end
24
-
25
28
  it 'should be able to split several PMs separated with an empty line' do
26
- StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:nil} ]
29
+ StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n\n 15 16 17 18 \n 11 21 31 41").should ==
30
+ [ Fabricate(:pm_first, name: nil), Fabricate(:pm_second, name: nil) ]
27
31
  end
28
-
29
32
  it 'should be able to split several PMs separated with name' do
30
- StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
31
-
32
- StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
33
+ StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \nPM_second\n 15 16 17 18 \n 11 21 31 41").should ==
34
+ [ Fabricate(:pm_first, name: nil), Fabricate(:pm_second) ]
33
35
  end
34
- end
35
-
36
- context '::split_on_motifs' do
37
- it 'should be able to split string into PMs' do
38
- result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
39
- result.map{|pm| pm.matrix}.should == [ [[1,2,3,4],[5,6,7,8],[9,10,11,12]], [[9,10,11,12],[1,2,3,4],[5,6,7,8]] ]
40
- result.map{|pm| pm.name}.should == [nil, 'Name']
36
+ it 'should be able to split several PMs separated with both name and empty line' do
37
+ StringParser.split_on_motifs("PM_first\n1 2 3 4 \n 5 6 7 8 \n\nPM_second\n 15 16 17 18 \n 11 21 31 41\n\n\n").should ==
38
+ [ Fabricate(:pm_first), Fabricate(:pm_second) ]
41
39
  end
42
40
  it 'should create PMs by default' do
43
41
  result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
@@ -50,54 +48,23 @@ module Bioinform
50
48
  end
51
49
 
52
50
  good_cases = {
53
- 'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
54
- matrix: [[1,2,3,4],[5,6,7,8]] },
55
-
56
- '4xN string' => {input: "1 5\n2 6\n3 7\n 4 8",
57
- matrix: [[1,2,3,4],[5,6,7,8]] },
58
-
59
- 'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
60
- matrix: [[1,2,3,4],[5,6,7,8]], name: 'TestMatrix' },
61
-
62
- 'string with name (with introduction sign)' => {input: ">\t TestMatrix\n1 5\n2 6\n3 7\n 4 8",
63
- matrix: [[1,2,3,4],[5,6,7,8]],
64
- name: 'TestMatrix' },
65
-
51
+ 'Nx4 string' => {input: "1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
52
+ '4xN string' => {input: "1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm_unnamed) },
53
+ 'string with name' => {input: "PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
54
+ 'string with name (with introduction sign)' => {input: ">\t PM_name\n1 5\n2 6\n3 7\n 4 8", result: Fabricate(:pm) },
66
55
  'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
67
- matrix: [[1,2,3,4],[5,6,7,8]], name: 'Testmatrix_first:subname+sub-subname' },
68
-
69
- 'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210",
70
- matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
71
-
72
- 'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3",
73
- matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
74
-
75
- 'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8",
76
- matrix: [[1,2,3,4],[5,6,7,8]] },
77
-
78
- 'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n",
79
- matrix: [[1,2,3,4],[5,6,7,8]] },
80
-
81
- 'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",
82
- matrix: [[1,2,3,4],[5,6,7,8]] },
83
-
84
- 'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8",
85
- matrix: [[1,2,3,4],[5,6,7,8]] },
86
-
87
- 'Nx4 string with name and acgt-header' => {input: "Name\nA C G T\n1 2 3 4\n5 6 7 8",
88
- matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'},
89
-
90
- 'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8",
91
- matrix: [[1,2,3,4],[5,6,7,8]] },
92
-
93
- '4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n0 0 0 0\n2 2 2 2",
94
- matrix: [[1,2,3,4],[5,6,7,8],[0,0,0,0],[2,2,2,2]] },
95
-
96
- '4x4 string with acgt-row-markers' => {input: "A|1 2 3 4\nC|5 6 7 8\nG|0 0 0 0\nT|2 2 2 2",
97
- matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]] },
98
-
99
- '4x4 string with name and acgt-row-markers' => {input: "Name\nA:1 2 3 4\nC:5 6 7 8\nG:0 0 0 0\nT:2 2 2 2",
100
- matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]], name: 'Name' }
56
+ result: Fabricate(:pm, name: 'Testmatrix_first:subname+sub-subname') },
57
+ 'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210", result: Fabricate(:pm_with_floats) },
58
+ 'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3", result: Fabricate(:pm_with_floats) },
59
+ 'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8", result: Fabricate(:pm_unnamed) },
60
+ 'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n", result: Fabricate(:pm_unnamed) },
61
+ 'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8", result: Fabricate(:pm_unnamed) },
62
+ 'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm_unnamed) },
63
+ 'Nx4 string with name and acgt-header' => {input: "PM_name\nA C G T\n1 2 3 4\n5 6 7 8", result: Fabricate(:pm)},
64
+ 'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8", result: Fabricate(:pm_unnamed) },
65
+ '4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n9 10 11 12\n13 14 15 16", result: Fabricate(:pm_4x4_unnamed) },
66
+ '4x4 string with acgt-row-markers' => {input: "A|1 5 9 13\nC|2 6 10 14\nG|3 7 11 15\nT|4 8 12 16", result: Fabricate(:pm_4x4_unnamed) },
67
+ '4x4 string with name and acgt-row-markers' => {input: "PM_name\nA:1 5 9 13\nC:2 6 10 14\nG:3 7 11 15\nT:4 8 12 16", result: Fabricate(:pm_4x4) }
101
68
  }
102
69
 
103
70
  bad_cases = {
@@ -1,5 +1,6 @@
1
- require 'spec_helper'
2
- require 'bioinform/parsers/parser'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/parsers/parser'
3
+ require_relative '../../lib/bioinform/data_models/collection'
3
4
 
4
5
  module Bioinform
5
6
  describe TrivialParser do
@@ -8,15 +9,56 @@ module Bioinform
8
9
  TrivialParser.instance_method(:initialize).arity.should == 1
9
10
  end
10
11
  end
11
- context '#parser!' do
12
- it 'should return input of that was passed to initialize' do
13
- TrivialParser.new('stub input').parse!.should == 'stub input'
12
+
13
+ context '#parse!' do
14
+ it 'should return OpenStruct based on input of that was passed to initialize when input is a Hash' do
15
+ TrivialParser.new(matrix: 'stub matrix', name: 'stub name').parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
16
+ end
17
+
18
+ it 'should return OpenStruct based on input of that was passed to initialize when input is a OpenStruct' do
19
+ TrivialParser.new(OpenStruct.new(matrix: 'stub matrix', name: 'stub name')).parse!.should == OpenStruct.new(matrix: 'stub matrix', name: 'stub name')
14
20
  end
15
21
  end
22
+
23
+ context '::split_on_motifs' do
24
+ it 'should be able to get a single PM' do
25
+ TrivialParser.split_on_motifs({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, PM).should == [ PM.new(matrix: [[1,2,3,4],[5,6,7,8]], name:'Name') ]
26
+ end
27
+ end
28
+
16
29
  it 'can be used to create PM with {matrix: ..., name: ...} form' do
17
30
  pm = PM.new({matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'}, TrivialParser)
18
31
  pm.matrix.should == [[1,2,3,4],[5,6,7,8]]
19
32
  pm.name.should == 'Name'
20
33
  end
34
+
35
+ it 'can be used to create PM from PM (make copy)' do
36
+ pm = Fabricate(:pm)
37
+ pm_copy = PM.new(pm, TrivialParser)
38
+ pm_copy.should == pm
39
+ end
40
+ end
41
+
42
+ describe TrivialCollectionParser do
43
+ before :each do
44
+ @pm_1 = Fabricate(:pm_first)
45
+ @pm_2 = Fabricate(:pm_second)
46
+ @collection = Fabricate(:two_elements_collection)
47
+ end
48
+
49
+ describe '#parse!' do
50
+ it 'can be used to obtain PMs from Collection' do
51
+ @parser = TrivialCollectionParser.new(@collection)
52
+ @parser.parse!.should == @pm_1
53
+ @parser.parse!.should == @pm_2
54
+ expect{ @parser.parse! }.to raise_error
55
+ end
56
+ end
57
+
58
+ describe '::split_on_motifs' do
59
+ it 'should be able to split collection into PMs' do
60
+ TrivialCollectionParser.split_on_motifs(@collection).should == [@pm_1, @pm_2]
61
+ end
62
+ end
21
63
  end
22
- end
64
+ end
@@ -0,0 +1,50 @@
1
+ require 'yaml'
2
+ require_relative '../spec_helper'
3
+ require_relative '../../lib/bioinform/parsers/yaml_parser'
4
+ require_relative '../../lib/bioinform/data_models/collection'
5
+
6
+ module Bioinform
7
+ describe YAMLParser do
8
+ context '#parse!' do
9
+ it 'should return PM that was encoded in YAML format' do
10
+ pm = Fabricate(:pm)
11
+ parser = YAMLParser.new(pm.to_yaml)
12
+ parser.parse!.should == pm
13
+ end
14
+ end
15
+ it 'can be used to create PM from yaml-string' do
16
+ pm = Fabricate(:pm)
17
+ pm_copy = PM.new(pm.to_yaml, YAMLParser)
18
+ pm_copy.should == pm
19
+ end
20
+
21
+ context '::split_on_motifs' do
22
+ it 'should be able to get a single PM' do
23
+ pm = Fabricate(:pm)
24
+ YAMLParser.split_on_motifs(pm.to_yaml, PM).should == [ pm ]
25
+ end
26
+ end
27
+ end
28
+
29
+ describe YAMLCollectionParser do
30
+ before :each do
31
+ @pm_1 = Fabricate(:pm_first)
32
+ @pm_2 = Fabricate(:pm_second)
33
+ @collection = Collection.new
34
+ @collection << @pm_1 << @pm_2
35
+ end
36
+ context '::split_on_motifs' do
37
+ it 'should be able to split collection into PMs' do
38
+ YAMLCollectionParser.split_on_motifs(@collection.to_yaml).should == [@pm_1, @pm_2]
39
+ end
40
+ end
41
+ context '#parse!' do
42
+ it 'should return PMs which were in encoded YAML format' do
43
+ @parser = YAMLCollectionParser.new(@collection.to_yaml)
44
+ @parser.parse!.should == @pm_1
45
+ @parser.parse!.should == @pm_2
46
+ expect{ @parser.parse! }.to raise_error
47
+ end
48
+ end
49
+ end
50
+ end
data/spec/spec_helper.rb CHANGED
@@ -5,6 +5,7 @@ require 'rspec'
5
5
 
6
6
  require 'fileutils'
7
7
  require 'stringio'
8
+ require 'fabrication'
8
9
 
9
10
  # from minitest
10
11
  def capture_io(&block)
@@ -39,12 +40,7 @@ def parser_specs(parser_klass, good_cases, bad_cases)
39
40
  good_cases.each do |case_description, input_and_result|
40
41
  it "should be able to parse #{case_description}" do
41
42
  result = parser_klass.new(input_and_result[:input]).parse
42
- result[:matrix].should == input_and_result[:matrix]
43
- if input_and_result.has_key?(:name)
44
- result[:name].should == input_and_result[:name]
45
- else
46
- result[:name].should be_nil
47
- end
43
+ Bioinform::PM.new(result).should == input_and_result[:result]
48
44
  end
49
45
  end
50
46
 
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/advanced_scan'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/advanced_scan'
3
3
 
4
4
  describe StringScanner do
5
5
  context '#advanced_scan' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/array_product'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/array_product'
3
3
 
4
4
  describe Array do
5
5
  context '::product' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/array_zip'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/array_zip'
3
3
 
4
4
  describe Array do
5
5
  context '::zip' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/collect_hash'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/collect_hash'
3
3
 
4
4
  describe Enumerable do
5
5
  # %w{A C G T}.collect_hash{|k| [k*2, k*3] }
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/delete_many'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/delete_many'
3
3
 
4
4
  describe Array do
5
5
  before :each do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/inverf'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/inverf'
3
3
 
4
4
  describe 'Math#inverf' do
5
5
  it 'should be erf(inverf(x)) == x' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/multiline_squish'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/multiline_squish'
3
3
 
4
4
  describe String do
5
5
  describe '#multiline_squish' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/partial_sums'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/partial_sums'
3
3
 
4
4
  describe 'Array#partial_sums' do
5
5
  context 'when no initial value given' do
@@ -1,5 +1,5 @@
1
- require 'spec_helper'
2
- require 'bioinform/support/same_by'
1
+ require_relative '../spec_helper'
2
+ require_relative '../../lib/bioinform/support/same_by'
3
3
 
4
4
  describe Enumerable do
5
5
  describe '#same_by?' do