bioinform 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO.txt +10 -5
- data/bioinform.gemspec +1 -1
- data/lib/bioinform/cli/pcm2pwm.rb +6 -6
- data/lib/bioinform/cli/split_motifs.rb +7 -7
- data/lib/bioinform/data_models.rb +2 -0
- data/lib/bioinform/data_models/collection.rb +85 -1
- data/lib/bioinform/data_models/pcm.rb +8 -5
- data/lib/bioinform/data_models/pm.rb +54 -39
- data/lib/bioinform/data_models/pwm.rb +3 -3
- data/lib/bioinform/parsers/parser.rb +11 -11
- data/lib/bioinform/parsers/string_fantom_parser.rb +23 -2
- data/lib/bioinform/parsers/string_parser.rb +16 -16
- data/lib/bioinform/support/collect_hash.rb +1 -1
- data/lib/bioinform/version.rb +1 -1
- data/spec/cli/data/KLF4 f2 spaced name.pcm +11 -0
- data/spec/cli/data/KLF4_f2.pcm +11 -0
- data/spec/cli/data/KLF4_f2.pwm.result +11 -0
- data/spec/cli/data/SP1_f1.pcm +12 -0
- data/spec/cli/data/SP1_f1.pwm.result +12 -0
- data/spec/cli/pcm2pwm_spec.rb +74 -0
- data/spec/data_models/collection_spec.rb +96 -0
- data/spec/data_models/pcm_spec.rb +5 -5
- data/spec/data_models/pm_spec.rb +136 -30
- data/spec/data_models/ppm_spec.rb +1 -1
- data/spec/data_models/pwm_spec.rb +2 -2
- data/spec/parsers/parser_spec.rb +26 -26
- data/spec/parsers/string_fantom_parser_spec.rb +52 -15
- data/spec/parsers/string_parser_spec.rb +34 -34
- data/spec/spec_helper.rb +32 -1
- data/spec/support/delete_many_spec.rb +2 -2
- metadata +14 -2
@@ -1,30 +1,67 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
require 'bioinform/parsers/string_fantom_parser'
|
3
3
|
|
4
|
-
module Bioinform
|
4
|
+
module Bioinform
|
5
5
|
describe StringFantomParser do
|
6
|
+
describe '#parse' do
|
7
|
+
it 'should be able to parse several motifs' do
|
8
|
+
input = <<-EOS
|
9
|
+
//
|
10
|
+
NA motif_1
|
11
|
+
P0 A C G T
|
12
|
+
P1 0 1 2 3
|
13
|
+
P2 4 5 6 7
|
14
|
+
//
|
15
|
+
//
|
16
|
+
NA motif_2
|
17
|
+
P0 A C G T
|
18
|
+
P1 1 2 3 4
|
19
|
+
P2 5 6 7 8
|
20
|
+
P3 9 10 11 12
|
21
|
+
//
|
22
|
+
NA motif_3
|
23
|
+
P0 A C G T
|
24
|
+
P1 2 3 4 5
|
25
|
+
P2 6 7 8 9
|
26
|
+
EOS
|
27
|
+
StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'},
|
28
|
+
{matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name: 'motif_2'},
|
29
|
+
{matrix: [[2,3,4,5],[6,7,8,9]], name: 'motif_3'} ]
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should be able to parse motif with additional rows' do
|
33
|
+
input = <<-EOS
|
34
|
+
NA motif_1
|
35
|
+
P0 A C G T S P
|
36
|
+
P1 0 1 2 3 5 10
|
37
|
+
P2 4 5 6 7 5 11
|
38
|
+
EOS
|
39
|
+
StringFantomParser.split(input).should == [ {matrix: [[0,1,2,3],[4,5,6,7]], name: 'motif_1'} ]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
6
43
|
good_cases = {
|
7
44
|
'string in Fantom-format' => {input: "
|
8
45
|
NA motif_CTNCAG
|
9
|
-
P0 A C G T
|
10
|
-
P1 0 1878368 0 0
|
11
|
-
P2 0 0 0 1878368
|
12
|
-
P3 469592 469592 469592 469592
|
13
|
-
P4 0 1878368 0 0
|
14
|
-
P5 1878368 0 0 0
|
46
|
+
P0 A C G T
|
47
|
+
P1 0 1878368 0 0
|
48
|
+
P2 0 0 0 1878368
|
49
|
+
P3 469592 469592 469592 469592
|
50
|
+
P4 0 1878368 0 0
|
51
|
+
P5 1878368 0 0 0
|
15
52
|
P6 0 0 1878368 0",
|
16
|
-
matrix: [ [0.0, 1878368.0, 0.0, 0.0],
|
17
|
-
[0.0, 0.0, 0.0, 1878368.0],
|
18
|
-
[469592.0, 469592.0, 469592.0, 469592.0],
|
19
|
-
[0.0, 1878368.0, 0.0, 0.0],
|
20
|
-
[1878368.0, 0.0, 0.0, 0.0],
|
53
|
+
matrix: [ [0.0, 1878368.0, 0.0, 0.0],
|
54
|
+
[0.0, 0.0, 0.0, 1878368.0],
|
55
|
+
[469592.0, 469592.0, 469592.0, 469592.0],
|
56
|
+
[0.0, 1878368.0, 0.0, 0.0],
|
57
|
+
[1878368.0, 0.0, 0.0, 0.0],
|
21
58
|
[0.0, 0.0, 1878368.0, 0.0]],
|
22
|
-
name: 'motif_CTNCAG'
|
59
|
+
name: 'motif_CTNCAG'
|
23
60
|
}
|
24
61
|
}
|
25
|
-
|
62
|
+
|
26
63
|
bad_cases = { }
|
27
|
-
|
64
|
+
|
28
65
|
parser_specs(StringFantomParser, good_cases, bad_cases)
|
29
66
|
end
|
30
67
|
end
|
@@ -16,23 +16,23 @@ module Bioinform
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
context '::split' do
|
21
21
|
it 'should be able to get a single PM' do
|
22
22
|
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12").should == [ {matrix: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], name:nil} ]
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
it 'should be able to split several PMs separated with an empty line' do
|
26
26
|
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:nil} ]
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
it 'should be able to split several PMs separated with name' do
|
30
30
|
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
|
31
|
-
|
31
|
+
|
32
32
|
StringParser.split("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \n\nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8\n\n\n").should == [ {matrix:[[1,2,3,4],[5,6,7,8],[9,10,11,12]],name:nil}, {matrix:[[9,10,11,12],[1,2,3,4],[5,6,7,8]],name:'Name'} ]
|
33
33
|
end
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
context '::split_on_motifs' do
|
37
37
|
it 'should be able to split string into PMs' do
|
38
38
|
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8")
|
@@ -45,66 +45,66 @@ module Bioinform
|
|
45
45
|
end
|
46
46
|
it 'should create PM subclass when it\'s specified' do
|
47
47
|
result = StringParser.split_on_motifs("1 2 3 4 \n 5 6 7 8 \n 9 10 11 12 \nName\n 9 10 11 12 \n 1 2 3 4 \n 5 6 7 8", PWM)
|
48
|
-
result.each{|pm| pm.class.should == PWM}
|
48
|
+
result.each{|pm| pm.class.should == PWM}
|
49
49
|
end
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
good_cases = {
|
53
|
-
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
|
53
|
+
'Nx4 string' => {input: "1 2 3 4\n5 6 7 8",
|
54
54
|
matrix: [[1,2,3,4],[5,6,7,8]] },
|
55
|
-
|
56
|
-
'4xN string' => {input: "1 5\n2 6\n3 7\n 4 8",
|
55
|
+
|
56
|
+
'4xN string' => {input: "1 5\n2 6\n3 7\n 4 8",
|
57
57
|
matrix: [[1,2,3,4],[5,6,7,8]] },
|
58
|
-
|
59
|
-
'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
58
|
+
|
59
|
+
'string with name' => {input: "TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
60
60
|
matrix: [[1,2,3,4],[5,6,7,8]], name: 'TestMatrix' },
|
61
|
-
|
61
|
+
|
62
62
|
'string with name (with introduction sign)' => {input: ">\t TestMatrix\n1 5\n2 6\n3 7\n 4 8",
|
63
63
|
matrix: [[1,2,3,4],[5,6,7,8]],
|
64
64
|
name: 'TestMatrix' },
|
65
|
-
|
66
|
-
'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
|
65
|
+
|
66
|
+
'string with name (with special characters)' => {input: "Testmatrix_first:subname+sub-subname\n1 5\n2 6\n3 7\n 4 8",
|
67
67
|
matrix: [[1,2,3,4],[5,6,7,8]], name: 'Testmatrix_first:subname+sub-subname' },
|
68
68
|
|
69
69
|
'string with float numerics' => {input: "1.23 4.56 7.8 9.0\n9 -8.7 6.54 -3210",
|
70
70
|
matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
|
71
|
-
|
71
|
+
|
72
72
|
'string with exponents' => {input: "123e-2 0.456e+1 7.8 9.0\n9 -87000000000E-10 6.54 -3.210e3",
|
73
73
|
matrix: [[1.23, 4.56, 7.8, 9.0], [9, -8.7, 6.54, -3210]]},
|
74
74
|
|
75
|
-
'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8",
|
75
|
+
'string with multiple spaces and tabs' => {input: "1 \t\t 2 3 4\n 5 6 7 8",
|
76
76
|
matrix: [[1,2,3,4],[5,6,7,8]] },
|
77
|
-
|
78
|
-
'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n",
|
77
|
+
|
78
|
+
'string with preceeding and terminating newlines' => {input: "\n\n\t 1 2 3 4\n5 6 7 8 \n\t\n",
|
79
79
|
matrix: [[1,2,3,4],[5,6,7,8]] },
|
80
|
-
|
80
|
+
|
81
81
|
'string with windows crlf' => {input: "1 2 3 4\r\n5 6 7 8",
|
82
82
|
matrix: [[1,2,3,4],[5,6,7,8]] },
|
83
|
-
|
84
|
-
'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8",
|
83
|
+
|
84
|
+
'Nx4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8",
|
85
85
|
matrix: [[1,2,3,4],[5,6,7,8]] },
|
86
|
-
|
87
|
-
'Nx4 string with name and acgt-header' => {input: "Name\nA C G T\n1 2 3 4\n5 6 7 8",
|
86
|
+
|
87
|
+
'Nx4 string with name and acgt-header' => {input: "Name\nA C G T\n1 2 3 4\n5 6 7 8",
|
88
88
|
matrix: [[1,2,3,4],[5,6,7,8]], name: 'Name'},
|
89
|
-
|
90
|
-
'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8",
|
89
|
+
|
90
|
+
'Nx4 string with acgt-row-markers' => {input: "A 1 5\nC : 2 6\nG3 7\nT |4 8",
|
91
91
|
matrix: [[1,2,3,4],[5,6,7,8]] },
|
92
|
-
|
93
|
-
'4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n0 0 0 0\n2 2 2 2",
|
92
|
+
|
93
|
+
'4x4 string with acgt-header' => {input: "A C G T\n1 2 3 4\n5 6 7 8\n0 0 0 0\n2 2 2 2",
|
94
94
|
matrix: [[1,2,3,4],[5,6,7,8],[0,0,0,0],[2,2,2,2]] },
|
95
|
-
|
96
|
-
'4x4 string with acgt-row-markers' => {input: "A|1 2 3 4\nC|5 6 7 8\nG|0 0 0 0\nT|2 2 2 2",
|
95
|
+
|
96
|
+
'4x4 string with acgt-row-markers' => {input: "A|1 2 3 4\nC|5 6 7 8\nG|0 0 0 0\nT|2 2 2 2",
|
97
97
|
matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]] },
|
98
|
-
|
99
|
-
'4x4 string with name and acgt-row-markers' => {input: "Name\nA:1 2 3 4\nC:5 6 7 8\nG:0 0 0 0\nT:2 2 2 2",
|
98
|
+
|
99
|
+
'4x4 string with name and acgt-row-markers' => {input: "Name\nA:1 2 3 4\nC:5 6 7 8\nG:0 0 0 0\nT:2 2 2 2",
|
100
100
|
matrix: [[1,5,0,2],[2,6,0,2],[3,7,0,2],[4,8,0,2]], name: 'Name' }
|
101
101
|
}
|
102
|
-
|
102
|
+
|
103
103
|
bad_cases = {
|
104
104
|
'string with non-numeric input' => {input: "1.23 4.56 78aaa 9.0\n9 -8.7 6.54 -3210" },
|
105
105
|
'string with empty exponent sign' => {input: "1.23 4.56 7.8 9.0\n 9e -8.7 6.54 3210" }
|
106
106
|
}
|
107
|
-
|
107
|
+
|
108
108
|
parser_specs(StringParser, good_cases, bad_cases)
|
109
109
|
end
|
110
110
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,6 +3,37 @@ $LOAD_PATH.unshift File.dirname(__FILE__)
|
|
3
3
|
|
4
4
|
require 'rspec'
|
5
5
|
|
6
|
+
require 'fileutils'
|
7
|
+
require 'stringio'
|
8
|
+
|
9
|
+
# from minitest
|
10
|
+
def capture_io(&block)
|
11
|
+
orig_stdout, orig_stderr = $stdout, $stderr
|
12
|
+
captured_stdout, captured_stderr = StringIO.new, StringIO.new
|
13
|
+
$stdout, $stderr = captured_stdout, captured_stderr
|
14
|
+
yield
|
15
|
+
return {stdout: captured_stdout.string, stderr: captured_stderr.string}
|
16
|
+
ensure
|
17
|
+
$stdout = orig_stdout
|
18
|
+
$stderr = orig_stderr
|
19
|
+
end
|
20
|
+
|
21
|
+
# Method stubs $stdin not STDIN !
|
22
|
+
def provide_stdin(input, &block)
|
23
|
+
orig_stdin = $stdin
|
24
|
+
$stdin = StringIO.new(input)
|
25
|
+
yield
|
26
|
+
ensure
|
27
|
+
$stdin = orig_stdin
|
28
|
+
end
|
29
|
+
|
30
|
+
def capture_output(&block)
|
31
|
+
capture_io(&block)[:stdout]
|
32
|
+
end
|
33
|
+
def capture_stderr(&block)
|
34
|
+
capture_io(&block)[:stderr]
|
35
|
+
end
|
36
|
+
|
6
37
|
def parser_specs(parser_klass, good_cases, bad_cases)
|
7
38
|
context '#parse!' do
|
8
39
|
good_cases.each do |case_description, input_and_result|
|
@@ -11,7 +42,7 @@ def parser_specs(parser_klass, good_cases, bad_cases)
|
|
11
42
|
result[:matrix].should == input_and_result[:matrix]
|
12
43
|
if input_and_result.has_key?(:name)
|
13
44
|
result[:name].should == input_and_result[:name]
|
14
|
-
else
|
45
|
+
else
|
15
46
|
result[:name].should be_nil
|
16
47
|
end
|
17
48
|
end
|
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
2
2
|
require 'bioinform/support/delete_many'
|
3
3
|
|
4
4
|
describe Array do
|
5
|
-
before :each do
|
5
|
+
before :each do
|
6
6
|
@arr = %w{a b c d e f g h i j b b}
|
7
7
|
end
|
8
8
|
describe '#delete_at_many' do
|
@@ -32,7 +32,7 @@ describe Array do
|
|
32
32
|
end
|
33
33
|
|
34
34
|
describe Hash do
|
35
|
-
before :each do
|
35
|
+
before :each do
|
36
36
|
@arr = {A: 3, T: 6, G: 4, C: 5}
|
37
37
|
end
|
38
38
|
describe '#delete_many' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bioinform
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -89,8 +89,14 @@ files:
|
|
89
89
|
- lib/bioinform/support/same_by.rb
|
90
90
|
- lib/bioinform/version.rb
|
91
91
|
- spec/cli/cli_spec.rb
|
92
|
+
- spec/cli/data/KLF4 f2 spaced name.pcm
|
93
|
+
- spec/cli/data/KLF4_f2.pcm
|
94
|
+
- spec/cli/data/KLF4_f2.pwm.result
|
95
|
+
- spec/cli/data/SP1_f1.pcm
|
96
|
+
- spec/cli/data/SP1_f1.pwm.result
|
92
97
|
- spec/cli/pcm2pwm_spec.rb
|
93
98
|
- spec/cli/split_motifs_spec.rb
|
99
|
+
- spec/data_models/collection_spec.rb
|
94
100
|
- spec/data_models/pcm_spec.rb
|
95
101
|
- spec/data_models/pm_spec.rb
|
96
102
|
- spec/data_models/ppm_spec.rb
|
@@ -138,8 +144,14 @@ summary: Classes for work with different input formats of positional matrices an
|
|
138
144
|
symbols
|
139
145
|
test_files:
|
140
146
|
- spec/cli/cli_spec.rb
|
147
|
+
- spec/cli/data/KLF4 f2 spaced name.pcm
|
148
|
+
- spec/cli/data/KLF4_f2.pcm
|
149
|
+
- spec/cli/data/KLF4_f2.pwm.result
|
150
|
+
- spec/cli/data/SP1_f1.pcm
|
151
|
+
- spec/cli/data/SP1_f1.pwm.result
|
141
152
|
- spec/cli/pcm2pwm_spec.rb
|
142
153
|
- spec/cli/split_motifs_spec.rb
|
154
|
+
- spec/data_models/collection_spec.rb
|
143
155
|
- spec/data_models/pcm_spec.rb
|
144
156
|
- spec/data_models/pm_spec.rb
|
145
157
|
- spec/data_models/ppm_spec.rb
|