macroape 3.3.7 → 3.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/Rakefile.rb +6 -6
- data/TODO.txt +23 -3
- data/benchmark/similarity_benchmark.rb +18 -18
- data/lib/macroape/aligned_pair_intersection.rb +4 -4
- data/lib/macroape/cli/align_motifs.rb +34 -28
- data/lib/macroape/cli/eval_alignment.rb +73 -47
- data/lib/macroape/cli/eval_similarity.rb +65 -40
- data/lib/macroape/cli/find_pvalue.rb +30 -34
- data/lib/macroape/cli/find_threshold.rb +52 -41
- data/lib/macroape/cli/preprocess_collection.rb +68 -58
- data/lib/macroape/cli/scan_collection.rb +89 -73
- data/lib/macroape/cli.rb +184 -1
- data/lib/macroape/counting.rb +31 -5
- data/lib/macroape/pwm_compare.rb +8 -2
- data/lib/macroape/pwm_compare_aligned.rb +15 -10
- data/lib/macroape/version.rb +2 -1
- data/macroape.gemspec +2 -1
- data/spec/count_distribution_spec.rb +11 -11
- data/test/align_motifs_test.rb +16 -4
- data/test/data/{AHR_si.pat → AHR_si.pwm} +0 -0
- data/test/data/{KLF3_f1.pat → KLF3_f1.pwm} +0 -0
- data/test/data/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
- data/test/data/KLF4_f2_scan_results_all.txt +1 -2
- data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -2
- data/test/data/KLF4_f2_scan_results_precise_mode.txt +1 -2
- data/test/data/KLF4_f2_scan_results_weak_threshold.txt +2 -0
- data/test/data/{SP1_f1.pat → SP1_f1.pwm} +0 -0
- data/test/data/{SP1_f1_revcomp.pat → SP1_f1_revcomp.pwm} +0 -0
- data/test/data/collection_pcm_without_thresholds.yaml +186 -183
- data/test/data/collection_without_thresholds.yaml +186 -183
- data/test/data/{medium_motif.pat → medium_motif.pwm} +0 -0
- data/test/data/{short_motif.pat → short_motif.pwm} +0 -0
- data/test/data/test_collection/{GABPA_f1.pat → GABPA_f1.pwm} +0 -0
- data/test/data/test_collection/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
- data/test/data/test_collection/{SP1_f1.pat → SP1_f1.pwm} +0 -0
- data/test/data/test_collection.yaml +179 -176
- data/test/data/test_collection_weak.yaml +214 -0
- data/test/eval_alignment_test.rb +97 -21
- data/test/eval_similarity_test.rb +104 -26
- data/test/find_pvalue_test.rb +22 -9
- data/test/find_threshold_test.rb +76 -25
- data/test/preprocess_collection_test.rb +16 -21
- data/test/scan_collection_test.rb +26 -14
- data/test/test_helper.rb +96 -12
- metadata +44 -24
@@ -6,46 +6,46 @@ describe Bioinform::PWM do
|
|
6
6
|
let :matrix_second do [[1,2,3,4],[2,3,4,5]] end
|
7
7
|
let :pwm_first do Bioinform::PWM.new(matrix_first) end
|
8
8
|
let :pwm_second do Bioinform::PWM.new(matrix_second) end
|
9
|
-
|
9
|
+
|
10
10
|
context '#count_distribution_after_threshold' do
|
11
|
-
|
11
|
+
|
12
12
|
it 'should return hash of score => count for all scores >= threshold' do
|
13
13
|
distribution_first = pwm_first.count_distribution_after_threshold(0)
|
14
14
|
distribution_first.keys.should == Array.product(*matrix_first).map{|score_row| score_row.inject(&:+)}
|
15
15
|
distribution_first.values.uniq.should == [1]
|
16
|
-
|
16
|
+
|
17
17
|
distribution_second = pwm_second.count_distribution_after_threshold(0)
|
18
18
|
distribution_second.should == { 3=>1, 4=>2, 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
|
19
|
-
|
19
|
+
|
20
20
|
distribution_second = pwm_second.count_distribution_after_threshold(5)
|
21
21
|
distribution_second.should == { 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
it 'should use existing precalculated hash @count_distribution if it exists' do
|
25
25
|
pwm = pwm_second;
|
26
26
|
pwm.instance_variable_set :@count_distribution, { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
|
27
|
-
|
27
|
+
|
28
28
|
distribution_second = pwm.count_distribution_after_threshold(0)
|
29
29
|
distribution_second.should == { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
|
30
|
-
|
30
|
+
|
31
31
|
distribution_second = pwm.count_distribution_after_threshold(5)
|
32
32
|
distribution_second.should == { 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
|
33
33
|
end
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
context '#count_distribution' do
|
37
37
|
it 'should return hash of score => count for all available scores' do
|
38
38
|
pwm_second.count_distribution.should == { 3=>1, 4=>2, 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
|
39
39
|
end
|
40
|
-
|
40
|
+
|
41
41
|
it 'should cache calculation in @count_distribution' do
|
42
42
|
pwm = pwm_second;
|
43
43
|
pwm.instance_variable_set :@count_distribution, { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
|
44
44
|
pwm.count_distribution.should == { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
|
45
|
-
|
45
|
+
|
46
46
|
pwm.instance_variable_set :@count_distribution, nil
|
47
47
|
pwm.count_distribution.should == { 3=>1, 4=>2, 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
51
|
end
|
data/test/align_motifs_test.rb
CHANGED
@@ -1,12 +1,24 @@
|
|
1
1
|
require_relative 'test_helper'
|
2
2
|
|
3
3
|
class TestAlignmotifs < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@start_dir = Dir.pwd
|
6
|
+
Dir.chdir File.join(File.dirname(__FILE__), 'data')
|
7
|
+
end
|
8
|
+
def teardown
|
9
|
+
Dir.chdir(@start_dir)
|
10
|
+
end
|
11
|
+
|
4
12
|
def test_align_motifs
|
5
|
-
assert_equal
|
6
|
-
|
13
|
+
assert_equal [%w[KLF4_f2.pwm 0 direct],
|
14
|
+
%w[KLF3_f1.pwm -4 direct],
|
15
|
+
%w[SP1_f1_revcomp.pwm -1 revcomp]],
|
16
|
+
Helpers.align_motifs_output('KLF4_f2.pwm KLF3_f1.pwm SP1_f1_revcomp.pwm')
|
7
17
|
end
|
8
18
|
def test_align_pcm_motifs
|
9
|
-
assert_equal
|
10
|
-
|
19
|
+
assert_equal [%w[KLF4_f2.pcm 0 direct],
|
20
|
+
%w[KLF3_f1.pcm -4 direct],
|
21
|
+
%w[SP1_f1_revcomp.pcm -1 revcomp]],
|
22
|
+
Helpers.align_motifs_output('--pcm KLF4_f2.pcm KLF3_f1.pcm SP1_f1_revcomp.pcm')
|
11
23
|
end
|
12
24
|
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -1,185 +1,188 @@
|
|
1
|
-
---
|
2
|
-
|
3
|
-
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
:
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
1
|
+
--- !ruby/object:Bioinform::Collection
|
2
|
+
container:
|
3
|
+
- !ruby/object:Bioinform::Motif
|
4
|
+
parameters: !ruby/object:OpenStruct
|
5
|
+
table:
|
6
|
+
:original_data_model: :pcm
|
7
|
+
:pcm: !ruby/object:Bioinform::PCM
|
8
|
+
parameters: !ruby/object:OpenStruct
|
9
|
+
table:
|
10
|
+
:name: GABPA_f1
|
11
|
+
:background:
|
12
|
+
- 1
|
13
|
+
- 1
|
14
|
+
- 1
|
15
|
+
- 1
|
16
|
+
modifiable: true
|
17
|
+
matrix:
|
18
|
+
- - 615.2572649050138
|
19
|
+
- 697.0698715160123
|
20
|
+
- 1261.1903440712872
|
21
|
+
- 176.43506582414153
|
22
|
+
- - 996.4929869323321
|
23
|
+
- 805.1878697364007
|
24
|
+
- 693.7695793644275
|
25
|
+
- 254.5021102832924
|
26
|
+
- - 1106.9888035794224
|
27
|
+
- 508.19444415177276
|
28
|
+
- 1029.8329748714536
|
29
|
+
- 104.93632371380718
|
30
|
+
- - 143.7121486195701
|
31
|
+
- 2086.4279160661263
|
32
|
+
- 518.37507049306
|
33
|
+
- 1.4374111377025893
|
34
|
+
- - 362.9541452731307
|
35
|
+
- 2369.473894845734
|
36
|
+
- 17.23702397004065
|
37
|
+
- 0.2874822275405179
|
38
|
+
- - 0.0
|
39
|
+
- 0.0
|
40
|
+
- 2749.952546316428
|
41
|
+
- 0.0
|
42
|
+
- - 0.0
|
43
|
+
- 0.0
|
44
|
+
- 2749.952546316428
|
45
|
+
- 0.0
|
46
|
+
- - 2748.2567506938462
|
47
|
+
- 1.695795622582083
|
48
|
+
- 0.0
|
49
|
+
- 0.0
|
50
|
+
- - 2726.6484322711017
|
51
|
+
- 1.1499289101620715
|
52
|
+
- 1.1499289101620715
|
53
|
+
- 21.00425622500253
|
54
|
+
- - 202.05697400573305
|
55
|
+
- 28.799402471063658
|
56
|
+
- 2518.808687612104
|
57
|
+
- 0.2874822275405179
|
58
|
+
- - 172.92889618879767
|
59
|
+
- 521.1240363384483
|
60
|
+
- 106.38197600987633
|
61
|
+
- 1949.517637779338
|
62
|
+
- - 398.1679460365911
|
63
|
+
- 424.20938204069563
|
64
|
+
- 1706.4024212088275
|
65
|
+
- 221.17279703034018
|
66
|
+
- - 764.2587933951809
|
67
|
+
- 675.0883944902433
|
68
|
+
- 1066.5413633225007
|
69
|
+
- 244.06399510852864
|
70
|
+
modifiable: true
|
71
|
+
- !ruby/object:Bioinform::Motif
|
72
|
+
parameters: !ruby/object:OpenStruct
|
73
|
+
table:
|
74
|
+
:original_data_model: :pcm
|
75
|
+
:pcm: !ruby/object:Bioinform::PCM
|
76
|
+
parameters: !ruby/object:OpenStruct
|
77
|
+
table:
|
78
|
+
:name: KLF4_f2
|
79
|
+
:background:
|
80
|
+
- 1
|
81
|
+
- 1
|
82
|
+
- 1
|
83
|
+
- 1
|
84
|
+
modifiable: true
|
85
|
+
matrix:
|
86
|
+
- - 1233.46088405354
|
87
|
+
- 93.18173277811673
|
88
|
+
- 1036.6014857092885
|
89
|
+
- 1258.2948629970272
|
90
|
+
- - 263.979242343185
|
91
|
+
- 5.314520555872139
|
92
|
+
- 3347.5949971525274
|
93
|
+
- 4.650205486388122
|
94
|
+
- - 76.7700780003465
|
95
|
+
- 6.643150694840173
|
96
|
+
- 3529.4896409394937
|
97
|
+
- 8.636095903292224
|
98
|
+
- - 57.86097393406657
|
99
|
+
- 18.102585643439472
|
100
|
+
- 3520.3342027139347
|
101
|
+
- 25.24120324653207
|
102
|
+
- - 518.1947904009378
|
103
|
+
- 1545.9062946905135
|
104
|
+
- 22.396758181071043
|
105
|
+
- 1535.0411222654507
|
106
|
+
- - 137.98151691820345
|
107
|
+
- 9.300410972776241
|
108
|
+
- 3456.320530770924
|
109
|
+
- 17.936506876068467
|
110
|
+
- - 115.27647661640499
|
111
|
+
- 81.51802997128804
|
112
|
+
- 1861.9425868567278
|
113
|
+
- 1562.801872093553
|
114
|
+
- - 227.8095486111286
|
115
|
+
- 42.84555258785854
|
116
|
+
- 3278.6396005325996
|
117
|
+
- 72.244263806387
|
118
|
+
- - 108.73384179997886
|
119
|
+
- 134.47328134862394
|
120
|
+
- 3162.880454846513
|
121
|
+
- 215.45138754285665
|
122
|
+
- - 238.49636899561344
|
123
|
+
- 2225.9561104691043
|
124
|
+
- 402.40727964384774
|
125
|
+
- 754.6792064294074
|
126
|
+
modifiable: true
|
127
|
+
- !ruby/object:Bioinform::Motif
|
128
|
+
parameters: !ruby/object:OpenStruct
|
129
|
+
table:
|
130
|
+
:original_data_model: :pcm
|
131
|
+
:pcm: !ruby/object:Bioinform::PCM
|
132
|
+
parameters: !ruby/object:OpenStruct
|
133
|
+
table:
|
134
|
+
:name: SP1_f1
|
135
|
+
:background:
|
136
|
+
- 1
|
137
|
+
- 1
|
138
|
+
- 1
|
139
|
+
- 1
|
140
|
+
modifiable: true
|
141
|
+
matrix:
|
142
|
+
- - 682.6436366358055
|
143
|
+
- 443.1455214015781
|
144
|
+
- 2075.655346294993
|
145
|
+
- 287.211468117951
|
146
|
+
- - 299.8883246804867
|
147
|
+
- 103.74338315843572
|
148
|
+
- 2613.8927022405364
|
149
|
+
- 471.1315623708902
|
150
|
+
- - 591.4892493324709
|
151
|
+
- 42.631827541794564
|
152
|
+
- 2845.1654083148564
|
153
|
+
- 9.36948726124641
|
154
|
+
- - 7.071084742361592
|
155
|
+
- 45.29093411231232
|
156
|
+
- 3432.8847704374107
|
157
|
+
- 3.409183158303573
|
158
|
+
- - 91.308984085713
|
159
|
+
- 19.1536481364332
|
160
|
+
- 3373.656949880137
|
161
|
+
- 4.5363903481026
|
162
|
+
- - 809.2082973387932
|
163
|
+
- 2246.941954176211
|
164
|
+
- 61.30766021687515
|
165
|
+
- 371.19806071846244
|
166
|
+
- - 120.56476435866055
|
167
|
+
- 42.4349244403591
|
168
|
+
- 3242.1560628684038
|
169
|
+
- 83.50022078295852
|
170
|
+
- - 13.72524477409959
|
171
|
+
- 35.858220519297525
|
172
|
+
- 3332.4066864946167
|
173
|
+
- 106.66582066236779
|
174
|
+
- - 558.1188080161639
|
175
|
+
- 90.0084504200356
|
176
|
+
- 2694.854973210736
|
177
|
+
- 145.67374080342415
|
178
|
+
- - 264.0088462230318
|
179
|
+
- 254.7175868081866
|
180
|
+
- 2796.88087480315
|
181
|
+
- 173.0486646159857
|
182
|
+
- - 519.46013914282
|
183
|
+
- 1874.9349086474765
|
184
|
+
- 654.5411208373813
|
185
|
+
- 439.7198038226514
|
186
|
+
modifiable: true
|
184
187
|
parameters: !ruby/object:OpenStruct
|
185
188
|
table: {}
|