macroape 3.3.7 → 3.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/README.md +2 -2
  2. data/Rakefile.rb +6 -6
  3. data/TODO.txt +23 -3
  4. data/benchmark/similarity_benchmark.rb +18 -18
  5. data/lib/macroape/aligned_pair_intersection.rb +4 -4
  6. data/lib/macroape/cli/align_motifs.rb +34 -28
  7. data/lib/macroape/cli/eval_alignment.rb +73 -47
  8. data/lib/macroape/cli/eval_similarity.rb +65 -40
  9. data/lib/macroape/cli/find_pvalue.rb +30 -34
  10. data/lib/macroape/cli/find_threshold.rb +52 -41
  11. data/lib/macroape/cli/preprocess_collection.rb +68 -58
  12. data/lib/macroape/cli/scan_collection.rb +89 -73
  13. data/lib/macroape/cli.rb +184 -1
  14. data/lib/macroape/counting.rb +31 -5
  15. data/lib/macroape/pwm_compare.rb +8 -2
  16. data/lib/macroape/pwm_compare_aligned.rb +15 -10
  17. data/lib/macroape/version.rb +2 -1
  18. data/macroape.gemspec +2 -1
  19. data/spec/count_distribution_spec.rb +11 -11
  20. data/test/align_motifs_test.rb +16 -4
  21. data/test/data/{AHR_si.pat → AHR_si.pwm} +0 -0
  22. data/test/data/{KLF3_f1.pat → KLF3_f1.pwm} +0 -0
  23. data/test/data/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
  24. data/test/data/KLF4_f2_scan_results_all.txt +1 -2
  25. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -2
  26. data/test/data/KLF4_f2_scan_results_precise_mode.txt +1 -2
  27. data/test/data/KLF4_f2_scan_results_weak_threshold.txt +2 -0
  28. data/test/data/{SP1_f1.pat → SP1_f1.pwm} +0 -0
  29. data/test/data/{SP1_f1_revcomp.pat → SP1_f1_revcomp.pwm} +0 -0
  30. data/test/data/collection_pcm_without_thresholds.yaml +186 -183
  31. data/test/data/collection_without_thresholds.yaml +186 -183
  32. data/test/data/{medium_motif.pat → medium_motif.pwm} +0 -0
  33. data/test/data/{short_motif.pat → short_motif.pwm} +0 -0
  34. data/test/data/test_collection/{GABPA_f1.pat → GABPA_f1.pwm} +0 -0
  35. data/test/data/test_collection/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
  36. data/test/data/test_collection/{SP1_f1.pat → SP1_f1.pwm} +0 -0
  37. data/test/data/test_collection.yaml +179 -176
  38. data/test/data/test_collection_weak.yaml +214 -0
  39. data/test/eval_alignment_test.rb +97 -21
  40. data/test/eval_similarity_test.rb +104 -26
  41. data/test/find_pvalue_test.rb +22 -9
  42. data/test/find_threshold_test.rb +76 -25
  43. data/test/preprocess_collection_test.rb +16 -21
  44. data/test/scan_collection_test.rb +26 -14
  45. data/test/test_helper.rb +96 -12
  46. metadata +44 -24
@@ -6,46 +6,46 @@ describe Bioinform::PWM do
6
6
  let :matrix_second do [[1,2,3,4],[2,3,4,5]] end
7
7
  let :pwm_first do Bioinform::PWM.new(matrix_first) end
8
8
  let :pwm_second do Bioinform::PWM.new(matrix_second) end
9
-
9
+
10
10
  context '#count_distribution_after_threshold' do
11
-
11
+
12
12
  it 'should return hash of score => count for all scores >= threshold' do
13
13
  distribution_first = pwm_first.count_distribution_after_threshold(0)
14
14
  distribution_first.keys.should == Array.product(*matrix_first).map{|score_row| score_row.inject(&:+)}
15
15
  distribution_first.values.uniq.should == [1]
16
-
16
+
17
17
  distribution_second = pwm_second.count_distribution_after_threshold(0)
18
18
  distribution_second.should == { 3=>1, 4=>2, 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
19
-
19
+
20
20
  distribution_second = pwm_second.count_distribution_after_threshold(5)
21
21
  distribution_second.should == { 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
22
22
  end
23
-
23
+
24
24
  it 'should use existing precalculated hash @count_distribution if it exists' do
25
25
  pwm = pwm_second;
26
26
  pwm.instance_variable_set :@count_distribution, { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
27
-
27
+
28
28
  distribution_second = pwm.count_distribution_after_threshold(0)
29
29
  distribution_second.should == { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
30
-
30
+
31
31
  distribution_second = pwm.count_distribution_after_threshold(5)
32
32
  distribution_second.should == { 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
33
33
  end
34
34
  end
35
-
35
+
36
36
  context '#count_distribution' do
37
37
  it 'should return hash of score => count for all available scores' do
38
38
  pwm_second.count_distribution.should == { 3=>1, 4=>2, 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
39
39
  end
40
-
40
+
41
41
  it 'should cache calculation in @count_distribution' do
42
42
  pwm = pwm_second;
43
43
  pwm.instance_variable_set :@count_distribution, { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
44
44
  pwm.count_distribution.should == { 3=>10, 4=>20, 5=>30, 6=>40, 7=>30, 8=>20, 9=>10 }
45
-
45
+
46
46
  pwm.instance_variable_set :@count_distribution, nil
47
47
  pwm.count_distribution.should == { 3=>1, 4=>2, 5=>3, 6=>4, 7=>3, 8=>2, 9=>1 }
48
48
  end
49
49
  end
50
-
50
+
51
51
  end
@@ -1,12 +1,24 @@
1
1
  require_relative 'test_helper'
2
2
 
3
3
  class TestAlignmotifs < Test::Unit::TestCase
4
+ def setup
5
+ @start_dir = Dir.pwd
6
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
7
+ end
8
+ def teardown
9
+ Dir.chdir(@start_dir)
10
+ end
11
+
4
12
  def test_align_motifs
5
- assert_equal "test/data/KLF4_f2.pat\t0\tdirect\ntest/data/KLF3_f1.pat\t-4\tdirect\ntest/data/SP1_f1_revcomp.pat\t-1\trevcomp\n",
6
- Helpers.align_motifs_output('test/data/KLF4_f2.pat test/data/KLF3_f1.pat test/data/SP1_f1_revcomp.pat')
13
+ assert_equal [%w[KLF4_f2.pwm 0 direct],
14
+ %w[KLF3_f1.pwm -4 direct],
15
+ %w[SP1_f1_revcomp.pwm -1 revcomp]],
16
+ Helpers.align_motifs_output('KLF4_f2.pwm KLF3_f1.pwm SP1_f1_revcomp.pwm')
7
17
  end
8
18
  def test_align_pcm_motifs
9
- assert_equal "test/data/KLF4_f2.pcm\t0\tdirect\ntest/data/KLF3_f1.pcm\t-4\tdirect\ntest/data/SP1_f1_revcomp.pcm\t-1\trevcomp\n",
10
- Helpers.align_motifs_output('--pcm test/data/KLF4_f2.pcm test/data/KLF3_f1.pcm test/data/SP1_f1_revcomp.pcm')
19
+ assert_equal [%w[KLF4_f2.pcm 0 direct],
20
+ %w[KLF3_f1.pcm -4 direct],
21
+ %w[SP1_f1_revcomp.pcm -1 revcomp]],
22
+ Helpers.align_motifs_output('--pcm KLF4_f2.pcm KLF3_f1.pcm SP1_f1_revcomp.pcm')
11
23
  end
12
24
  end
File without changes
File without changes
File without changes
@@ -1,4 +1,3 @@
1
- #pwm similarity shift overlap orientation
2
1
  KLF4_f2 1.0 0 10 direct
3
2
  SP1_f1 0.22754919499105544 -1 10 direct
4
- GABPA_f1 0.00043527658136684877 -8 5 direct
3
+ GABPA_f1 0.00043527658136684877 -8 5 direct
@@ -1,3 +1,2 @@
1
- #pwm similarity shift overlap orientation
2
1
  KLF4_f2 1.0 0 10 direct
3
- SP1_f1 0.22754919499105544 -1 10 direct
2
+ SP1_f1 0.22754919499105544 -1 10 direct
@@ -1,4 +1,3 @@
1
- #pwm similarity shift overlap orientation
2
1
  KLF4_f2 1.0 0 10 direct *
3
2
  SP1_f1 0.2420758234928527 -1 10 direct *
4
- GABPA_f1 0.00043527658136684877 -8 5 direct
3
+ GABPA_f1 0.00043527658136684877 -8 5 direct .
@@ -0,0 +1,2 @@
1
+ KLF4_f2 1.0 0 10 direct
2
+ SP1_f1 0.2580456407255705 -1 10 direct
File without changes
@@ -1,185 +1,188 @@
1
- --- &18345276 !ruby/object:Bioinform::Collection
2
- collection:
3
- - - !ruby/object:Bioinform::PCM
4
- parameters: !ruby/object:OpenStruct
5
- table:
6
- :name: GABPA_f1
7
- :tags:
8
- - *18345276
9
- :background:
10
- - 1
11
- - 1
12
- - 1
13
- - 1
14
- modifiable: true
15
- matrix:
16
- - - 615.2572649050138
17
- - 697.0698715160123
18
- - 1261.1903440712872
19
- - 176.43506582414153
20
- - - 996.4929869323321
21
- - 805.1878697364007
22
- - 693.7695793644275
23
- - 254.5021102832924
24
- - - 1106.9888035794224
25
- - 508.19444415177276
26
- - 1029.8329748714536
27
- - 104.93632371380718
28
- - - 143.7121486195701
29
- - 2086.4279160661263
30
- - 518.37507049306
31
- - 1.4374111377025893
32
- - - 362.9541452731307
33
- - 2369.473894845734
34
- - 17.23702397004065
35
- - 0.2874822275405179
36
- - - 0.0
37
- - 0.0
38
- - 2749.952546316428
39
- - 0.0
40
- - - 0.0
41
- - 0.0
42
- - 2749.952546316428
43
- - 0.0
44
- - - 2748.2567506938462
45
- - 1.695795622582083
46
- - 0.0
47
- - 0.0
48
- - - 2726.6484322711017
49
- - 1.1499289101620715
50
- - 1.1499289101620715
51
- - 21.00425622500253
52
- - - 202.05697400573305
53
- - 28.799402471063658
54
- - 2518.808687612104
55
- - 0.2874822275405179
56
- - - 172.92889618879767
57
- - 521.1240363384483
58
- - 106.38197600987633
59
- - 1949.517637779338
60
- - - 398.1679460365911
61
- - 424.20938204069563
62
- - 1706.4024212088275
63
- - 221.17279703034018
64
- - - 764.2587933951809
65
- - 675.0883944902433
66
- - 1066.5413633225007
67
- - 244.06399510852864
68
- - !ruby/object:OpenStruct
69
- table: {}
70
- - - !ruby/object:Bioinform::PCM
71
- parameters: !ruby/object:OpenStruct
72
- table:
73
- :name: KLF4_f2
74
- :tags:
75
- - *18345276
76
- :background:
77
- - 1
78
- - 1
79
- - 1
80
- - 1
81
- modifiable: true
82
- matrix:
83
- - - 1233.46088405354
84
- - 93.18173277811673
85
- - 1036.6014857092885
86
- - 1258.2948629970272
87
- - - 263.979242343185
88
- - 5.314520555872139
89
- - 3347.5949971525274
90
- - 4.650205486388122
91
- - - 76.7700780003465
92
- - 6.643150694840173
93
- - 3529.4896409394937
94
- - 8.636095903292224
95
- - - 57.86097393406657
96
- - 18.102585643439472
97
- - 3520.3342027139347
98
- - 25.24120324653207
99
- - - 518.1947904009378
100
- - 1545.9062946905135
101
- - 22.396758181071043
102
- - 1535.0411222654507
103
- - - 137.98151691820345
104
- - 9.300410972776241
105
- - 3456.320530770924
106
- - 17.936506876068467
107
- - - 115.27647661640499
108
- - 81.51802997128804
109
- - 1861.9425868567278
110
- - 1562.801872093553
111
- - - 227.8095486111286
112
- - 42.84555258785854
113
- - 3278.6396005325996
114
- - 72.244263806387
115
- - - 108.73384179997886
116
- - 134.47328134862394
117
- - 3162.880454846513
118
- - 215.45138754285665
119
- - - 238.49636899561344
120
- - 2225.9561104691043
121
- - 402.40727964384774
122
- - 754.6792064294074
123
- - !ruby/object:OpenStruct
124
- table: {}
125
- - - !ruby/object:Bioinform::PCM
126
- parameters: !ruby/object:OpenStruct
127
- table:
128
- :name: SP1_f1
129
- :tags:
130
- - *18345276
131
- :background:
132
- - 1
133
- - 1
134
- - 1
135
- - 1
136
- modifiable: true
137
- matrix:
138
- - - 682.6436366358055
139
- - 443.1455214015781
140
- - 2075.655346294993
141
- - 287.211468117951
142
- - - 299.8883246804867
143
- - 103.74338315843572
144
- - 2613.8927022405364
145
- - 471.1315623708902
146
- - - 591.4892493324709
147
- - 42.631827541794564
148
- - 2845.1654083148564
149
- - 9.36948726124641
150
- - - 7.071084742361592
151
- - 45.29093411231232
152
- - 3432.8847704374107
153
- - 3.409183158303573
154
- - - 91.308984085713
155
- - 19.1536481364332
156
- - 3373.656949880137
157
- - 4.5363903481026
158
- - - 809.2082973387932
159
- - 2246.941954176211
160
- - 61.30766021687515
161
- - 371.19806071846244
162
- - - 120.56476435866055
163
- - 42.4349244403591
164
- - 3242.1560628684038
165
- - 83.50022078295852
166
- - - 13.72524477409959
167
- - 35.858220519297525
168
- - 3332.4066864946167
169
- - 106.66582066236779
170
- - - 558.1188080161639
171
- - 90.0084504200356
172
- - 2694.854973210736
173
- - 145.67374080342415
174
- - - 264.0088462230318
175
- - 254.7175868081866
176
- - 2796.88087480315
177
- - 173.0486646159857
178
- - - 519.46013914282
179
- - 1874.9349086474765
180
- - 654.5411208373813
181
- - 439.7198038226514
182
- - !ruby/object:OpenStruct
183
- table: {}
1
+ --- !ruby/object:Bioinform::Collection
2
+ container:
3
+ - !ruby/object:Bioinform::Motif
4
+ parameters: !ruby/object:OpenStruct
5
+ table:
6
+ :original_data_model: :pcm
7
+ :pcm: !ruby/object:Bioinform::PCM
8
+ parameters: !ruby/object:OpenStruct
9
+ table:
10
+ :name: GABPA_f1
11
+ :background:
12
+ - 1
13
+ - 1
14
+ - 1
15
+ - 1
16
+ modifiable: true
17
+ matrix:
18
+ - - 615.2572649050138
19
+ - 697.0698715160123
20
+ - 1261.1903440712872
21
+ - 176.43506582414153
22
+ - - 996.4929869323321
23
+ - 805.1878697364007
24
+ - 693.7695793644275
25
+ - 254.5021102832924
26
+ - - 1106.9888035794224
27
+ - 508.19444415177276
28
+ - 1029.8329748714536
29
+ - 104.93632371380718
30
+ - - 143.7121486195701
31
+ - 2086.4279160661263
32
+ - 518.37507049306
33
+ - 1.4374111377025893
34
+ - - 362.9541452731307
35
+ - 2369.473894845734
36
+ - 17.23702397004065
37
+ - 0.2874822275405179
38
+ - - 0.0
39
+ - 0.0
40
+ - 2749.952546316428
41
+ - 0.0
42
+ - - 0.0
43
+ - 0.0
44
+ - 2749.952546316428
45
+ - 0.0
46
+ - - 2748.2567506938462
47
+ - 1.695795622582083
48
+ - 0.0
49
+ - 0.0
50
+ - - 2726.6484322711017
51
+ - 1.1499289101620715
52
+ - 1.1499289101620715
53
+ - 21.00425622500253
54
+ - - 202.05697400573305
55
+ - 28.799402471063658
56
+ - 2518.808687612104
57
+ - 0.2874822275405179
58
+ - - 172.92889618879767
59
+ - 521.1240363384483
60
+ - 106.38197600987633
61
+ - 1949.517637779338
62
+ - - 398.1679460365911
63
+ - 424.20938204069563
64
+ - 1706.4024212088275
65
+ - 221.17279703034018
66
+ - - 764.2587933951809
67
+ - 675.0883944902433
68
+ - 1066.5413633225007
69
+ - 244.06399510852864
70
+ modifiable: true
71
+ - !ruby/object:Bioinform::Motif
72
+ parameters: !ruby/object:OpenStruct
73
+ table:
74
+ :original_data_model: :pcm
75
+ :pcm: !ruby/object:Bioinform::PCM
76
+ parameters: !ruby/object:OpenStruct
77
+ table:
78
+ :name: KLF4_f2
79
+ :background:
80
+ - 1
81
+ - 1
82
+ - 1
83
+ - 1
84
+ modifiable: true
85
+ matrix:
86
+ - - 1233.46088405354
87
+ - 93.18173277811673
88
+ - 1036.6014857092885
89
+ - 1258.2948629970272
90
+ - - 263.979242343185
91
+ - 5.314520555872139
92
+ - 3347.5949971525274
93
+ - 4.650205486388122
94
+ - - 76.7700780003465
95
+ - 6.643150694840173
96
+ - 3529.4896409394937
97
+ - 8.636095903292224
98
+ - - 57.86097393406657
99
+ - 18.102585643439472
100
+ - 3520.3342027139347
101
+ - 25.24120324653207
102
+ - - 518.1947904009378
103
+ - 1545.9062946905135
104
+ - 22.396758181071043
105
+ - 1535.0411222654507
106
+ - - 137.98151691820345
107
+ - 9.300410972776241
108
+ - 3456.320530770924
109
+ - 17.936506876068467
110
+ - - 115.27647661640499
111
+ - 81.51802997128804
112
+ - 1861.9425868567278
113
+ - 1562.801872093553
114
+ - - 227.8095486111286
115
+ - 42.84555258785854
116
+ - 3278.6396005325996
117
+ - 72.244263806387
118
+ - - 108.73384179997886
119
+ - 134.47328134862394
120
+ - 3162.880454846513
121
+ - 215.45138754285665
122
+ - - 238.49636899561344
123
+ - 2225.9561104691043
124
+ - 402.40727964384774
125
+ - 754.6792064294074
126
+ modifiable: true
127
+ - !ruby/object:Bioinform::Motif
128
+ parameters: !ruby/object:OpenStruct
129
+ table:
130
+ :original_data_model: :pcm
131
+ :pcm: !ruby/object:Bioinform::PCM
132
+ parameters: !ruby/object:OpenStruct
133
+ table:
134
+ :name: SP1_f1
135
+ :background:
136
+ - 1
137
+ - 1
138
+ - 1
139
+ - 1
140
+ modifiable: true
141
+ matrix:
142
+ - - 682.6436366358055
143
+ - 443.1455214015781
144
+ - 2075.655346294993
145
+ - 287.211468117951
146
+ - - 299.8883246804867
147
+ - 103.74338315843572
148
+ - 2613.8927022405364
149
+ - 471.1315623708902
150
+ - - 591.4892493324709
151
+ - 42.631827541794564
152
+ - 2845.1654083148564
153
+ - 9.36948726124641
154
+ - - 7.071084742361592
155
+ - 45.29093411231232
156
+ - 3432.8847704374107
157
+ - 3.409183158303573
158
+ - - 91.308984085713
159
+ - 19.1536481364332
160
+ - 3373.656949880137
161
+ - 4.5363903481026
162
+ - - 809.2082973387932
163
+ - 2246.941954176211
164
+ - 61.30766021687515
165
+ - 371.19806071846244
166
+ - - 120.56476435866055
167
+ - 42.4349244403591
168
+ - 3242.1560628684038
169
+ - 83.50022078295852
170
+ - - 13.72524477409959
171
+ - 35.858220519297525
172
+ - 3332.4066864946167
173
+ - 106.66582066236779
174
+ - - 558.1188080161639
175
+ - 90.0084504200356
176
+ - 2694.854973210736
177
+ - 145.67374080342415
178
+ - - 264.0088462230318
179
+ - 254.7175868081866
180
+ - 2796.88087480315
181
+ - 173.0486646159857
182
+ - - 519.46013914282
183
+ - 1874.9349086474765
184
+ - 654.5411208373813
185
+ - 439.7198038226514
186
+ modifiable: true
184
187
  parameters: !ruby/object:OpenStruct
185
188
  table: {}