macroape 3.3.7 → 3.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/README.md +2 -2
  2. data/Rakefile.rb +6 -6
  3. data/TODO.txt +23 -3
  4. data/benchmark/similarity_benchmark.rb +18 -18
  5. data/lib/macroape/aligned_pair_intersection.rb +4 -4
  6. data/lib/macroape/cli/align_motifs.rb +34 -28
  7. data/lib/macroape/cli/eval_alignment.rb +73 -47
  8. data/lib/macroape/cli/eval_similarity.rb +65 -40
  9. data/lib/macroape/cli/find_pvalue.rb +30 -34
  10. data/lib/macroape/cli/find_threshold.rb +52 -41
  11. data/lib/macroape/cli/preprocess_collection.rb +68 -58
  12. data/lib/macroape/cli/scan_collection.rb +89 -73
  13. data/lib/macroape/cli.rb +184 -1
  14. data/lib/macroape/counting.rb +31 -5
  15. data/lib/macroape/pwm_compare.rb +8 -2
  16. data/lib/macroape/pwm_compare_aligned.rb +15 -10
  17. data/lib/macroape/version.rb +2 -1
  18. data/macroape.gemspec +2 -1
  19. data/spec/count_distribution_spec.rb +11 -11
  20. data/test/align_motifs_test.rb +16 -4
  21. data/test/data/{AHR_si.pat → AHR_si.pwm} +0 -0
  22. data/test/data/{KLF3_f1.pat → KLF3_f1.pwm} +0 -0
  23. data/test/data/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
  24. data/test/data/KLF4_f2_scan_results_all.txt +1 -2
  25. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -2
  26. data/test/data/KLF4_f2_scan_results_precise_mode.txt +1 -2
  27. data/test/data/KLF4_f2_scan_results_weak_threshold.txt +2 -0
  28. data/test/data/{SP1_f1.pat → SP1_f1.pwm} +0 -0
  29. data/test/data/{SP1_f1_revcomp.pat → SP1_f1_revcomp.pwm} +0 -0
  30. data/test/data/collection_pcm_without_thresholds.yaml +186 -183
  31. data/test/data/collection_without_thresholds.yaml +186 -183
  32. data/test/data/{medium_motif.pat → medium_motif.pwm} +0 -0
  33. data/test/data/{short_motif.pat → short_motif.pwm} +0 -0
  34. data/test/data/test_collection/{GABPA_f1.pat → GABPA_f1.pwm} +0 -0
  35. data/test/data/test_collection/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
  36. data/test/data/test_collection/{SP1_f1.pat → SP1_f1.pwm} +0 -0
  37. data/test/data/test_collection.yaml +179 -176
  38. data/test/data/test_collection_weak.yaml +214 -0
  39. data/test/eval_alignment_test.rb +97 -21
  40. data/test/eval_similarity_test.rb +104 -26
  41. data/test/find_pvalue_test.rb +22 -9
  42. data/test/find_threshold_test.rb +76 -25
  43. data/test/preprocess_collection_test.rb +16 -21
  44. data/test/scan_collection_test.rb +26 -14
  45. data/test/test_helper.rb +96 -12
  46. metadata +44 -24
@@ -1,35 +1,111 @@
1
1
  require_relative 'test_helper'
2
2
 
3
3
  class TestEvalAlignment < Test::Unit::TestCase
4
- def test_process_pcm_files
5
- assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pcm test/data/SP1_f1.pcm -1 direct --pcm')
4
+ include Helpers
5
+ def setup
6
+ @start_dir = Dir.pwd
7
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
8
+ end
9
+ def teardown
10
+ Dir.chdir(@start_dir)
6
11
  end
7
12
 
8
- def test_process_at_optimal_alignment
9
- assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')
13
+ def test_process_weak_threshold
14
+ assert_similarity_info_output({similarity: 0.24382446963092125,
15
+ distance: 0.7561755303690787,
16
+ length: 11,
17
+ shift: -1,
18
+ orientation: 'direct',
19
+ words_recognized_by_both: 839.0,
20
+ threshold_first: 5.8,
21
+ words_recognized_by_first: 2104.0,
22
+ pvalue_recognized_by_first: 0.0005016326904296875,
23
+ threshold_second: 5.6,
24
+ words_recognized_by_second: 2176.0,
25
+ pvalue_recognized_by_second: 0.000518798828125,
26
+ matrix_first_alignment: '.>>>>>>>>>>',
27
+ matrix_second_alignment: '>>>>>>>>>>>' #, discretization: 10.0
28
+ },
29
+ Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 direct'))
30
+ end
31
+
32
+
33
+ def test_process_strong_threshold
34
+ assert_similarity_info_output({similarity: 0.2420758234928527,
35
+ distance: 0.7579241765071473,
36
+ length: 11,
37
+ shift: -1,
38
+ orientation: 'direct',
39
+ words_recognized_by_both: 779.0,
40
+ threshold_first: 5.8100000000000005,
41
+ words_recognized_by_first: 1964.0,
42
+ pvalue_recognized_by_first: 0.00046825408935546875,
43
+ threshold_second: 5.61,
44
+ words_recognized_by_second: 2033.0,
45
+ pvalue_recognized_by_second: 0.00048470497131347656,
46
+ matrix_first_alignment: '.>>>>>>>>>>',
47
+ matrix_second_alignment: '>>>>>>>>>>>' #, discretization: 10.0
48
+ },
49
+ Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 direct --boundary lower'))
50
+ end
51
+
52
+ def test_process_custom_thresholds
53
+ assert_similarity_info_output({similarity: 0.28505023241865346,
54
+ words_recognized_by_both: 1901.0,
55
+ words_recognized_by_first: 4348.0,
56
+ words_recognized_by_second: 4222.0,
57
+ # threshold_first: 4.7,
58
+ # threshold_second: 4.6
59
+ },
60
+ Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 direct --first-threshold 4.7 --second-threshold 4.6'))
10
61
  end
11
62
  def test_process_not_optimal_alignment
12
- assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct')
63
+ assert_similarity_info_output({similarity: 0.004517983923018248,
64
+ length: 12,
65
+ words_recognized_by_both: 77.0,
66
+ words_recognized_by_first: 8416.0,
67
+ words_recognized_by_second: 8704.0,
68
+ matrix_first_alignment: '>>>>>>>>>>..',
69
+ matrix_second_alignment: '.>>>>>>>>>>>',
70
+ shift: 1,
71
+ orientation: 'direct'},
72
+ Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm 1 direct'))
73
+ end
74
+
75
+ def test_process_at_optimal_alignment_reversed
76
+ assert_similarity_info_output({similarity: 0.0,
77
+ words_recognized_by_both: 0.0,
78
+ length: 11,
79
+ matrix_first_alignment: '.>>>>>>>>>>',
80
+ matrix_second_alignment:'<<<<<<<<<<<',
81
+ shift: -1,
82
+ orientation: 'revcomp'},
83
+ Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 revcomp'))
13
84
  end
85
+
86
+ def test_process_pcm_files
87
+ assert_equal( Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 direct'),
88
+ Helpers.eval_alignment_output('KLF4_f2.pcm SP1_f1.pcm -1 direct --pcm'))
89
+ end
90
+
14
91
  def test_process_alignment_first_motif_from_stdin
15
- assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n",
16
- Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')) {
17
- Helpers.eval_alignment_output('.stdin test/data/SP1_f1.pat 0 direct')
18
- }
92
+ result = Helpers.provide_stdin(File.read('KLF4_f2.pwm')) {
93
+ Helpers.eval_alignment_output('.stdin SP1_f1.pwm 0 direct') }
94
+ assert_equal( Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm 0 direct'),
95
+ result )
19
96
  end
97
+
20
98
  def test_process_alignment_second_motif_from_stdin
21
- assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n",
22
- Helpers.provide_stdin(File.read('test/data/SP1_f1.pat')) {
23
- Helpers.eval_alignment_output('test/data/KLF4_f2.pat .stdin 0 direct')
24
- }
99
+ result = Helpers.provide_stdin(File.read('SP1_f1.pwm')) {
100
+ Helpers.eval_alignment_output('KLF4_f2.pwm .stdin 0 direct') }
101
+ assert_equal( Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm 0 direct'),
102
+ result )
25
103
  end
104
+
26
105
  def test_process_alignment_both_motifs_from_stdin
27
- assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n",
28
- Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat') + File.read('test/data/SP1_f1.pat')) {
29
- Helpers.eval_alignment_output('.stdin .stdin 0 direct')
30
- }
31
- end
32
- def test_process_at_optimal_alignment_reversed
33
- assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp')
106
+ result = Helpers.provide_stdin(File.read('KLF4_f2.pwm') + File.read('SP1_f1.pwm')) {
107
+ Helpers.eval_alignment_output('.stdin .stdin 0 direct') }
108
+ assert_equal( Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm 0 direct'),
109
+ result )
34
110
  end
35
- end
111
+ end
@@ -1,45 +1,123 @@
1
1
  require_relative 'test_helper'
2
2
 
3
3
  class TestEvalSimilarity < Test::Unit::TestCase
4
- def test_process_pair_of_pcms
5
- assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_similarity_output('test/data/KLF4_f2.pcm test/data/SP1_f1.pcm --pcm')
4
+ include Helpers
5
+ def setup
6
+ @start_dir = Dir.pwd
7
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
6
8
  end
7
- def test_process_pair_of_pwms
8
- assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_similarity_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat')
9
+ def teardown
10
+ Dir.chdir(@start_dir)
9
11
  end
10
- def test_process_another_pair_of_pwms
11
- assert_equal "0.0037332005973120955\n15.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>.\n1\tdirect\n", Helpers.eval_similarity_output('test/data/SP1_f1.pat test/data/AHR_si.pat')
12
+
13
+ def test_process_strong_thresholds
14
+ assert_similarity_info_output({similarity: 0.2420758234928527,
15
+ distance: 0.7579241765071473,
16
+ length: 11,
17
+ shift: -1,
18
+ orientation: 'direct',
19
+ words_recognized_by_both: 779.0,
20
+ threshold_first: 5.8100000000000005,
21
+ words_recognized_by_first: 1964.0,
22
+ pvalue_recognized_by_first: 0.00046825408935546875,
23
+ threshold_second: 5.61,
24
+ words_recognized_by_second: 2033.0,
25
+ pvalue_recognized_by_second: 0.00048470497131347656,
26
+ matrix_first_alignment: '.>>>>>>>>>>',
27
+ matrix_second_alignment: '>>>>>>>>>>>' #, discretization: 10.0
28
+ },
29
+ Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm --boundary lower'))
30
+ end
31
+
32
+ def test_process_weak_thresholds
33
+ assert_similarity_info_output({similarity: 0.24382446963092125,
34
+ distance: 0.7561755303690787,
35
+ length: 11,
36
+ shift: -1,
37
+ orientation: 'direct',
38
+ words_recognized_by_both: 839.0,
39
+ threshold_first: 5.8,
40
+ words_recognized_by_first: 2104.0,
41
+ pvalue_recognized_by_first: 0.0005016326904296875,
42
+ threshold_second: 5.6,
43
+ words_recognized_by_second: 2176.0,
44
+ pvalue_recognized_by_second: 0.000518798828125,
45
+ matrix_first_alignment: '.>>>>>>>>>>',
46
+ matrix_second_alignment: '>>>>>>>>>>>' #, discretization: 10.0
47
+ },
48
+ Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'))
49
+ end
50
+ def test_process_custom_threshold
51
+ assert_similarity_info_output({similarity: 0.28505023241865346,
52
+ words_recognized_by_both: 1901.0,
53
+ words_recognized_by_first: 4348.0,
54
+ words_recognized_by_second: 4222.0,
55
+ # threshold_first: 4.7,
56
+ # threshold_second: 4.6
57
+ },
58
+ Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm --first-threshold 4.7 --second-threshold 4.6'))
12
59
  end
13
-
60
+
61
+ def test_process_dissimilar_pair_of_pwms
62
+ assert_similarity_info_output({similarity: 0.0037332005973120955,
63
+ words_recognized_by_both: 15.0,
64
+ words_recognized_by_first: 2033.0,
65
+ words_recognized_by_second: 2000.0,
66
+ length: 11,
67
+ matrix_first_alignment: '>>>>>>>>>>>',
68
+ matrix_second_alignment: '.>>>>>>>>>.',
69
+ shift: 1,
70
+ orientation: 'direct'},
71
+ Helpers.eval_similarity_output('SP1_f1.pwm AHR_si.pwm --boundary lower'))
72
+ end
73
+
14
74
  def test_recognize_orientation_of_alignment
15
- assert_equal "1.0\n2033.0\t11\n>>>>>>>>>>>\n<<<<<<<<<<<\n0\trevcomp\n", Helpers.eval_similarity_output('test/data/SP1_f1_revcomp.pat test/data/SP1_f1.pat')
75
+ assert_similarity_info_output({similarity: 1.0,
76
+ words_recognized_by_both: 2176.0,
77
+ words_recognized_by_first: 2176.0,
78
+ words_recognized_by_second: 2176.0,
79
+ length: 11,
80
+ matrix_first_alignment: '>>>>>>>>>>>',
81
+ matrix_second_alignment: '<<<<<<<<<<<',
82
+ shift: 0,
83
+ orientation: 'revcomp'},
84
+ Helpers.eval_similarity_output('SP1_f1_revcomp.pwm SP1_f1.pwm'))
16
85
  end
17
86
 
18
87
  def test_process_custom_discretization
19
- assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n", Helpers.eval_similarity_output('test/data/SP1_f1.pat test/data/KLF4_f2.pat -d 1')
88
+ assert_similarity_info_output({similarity: 0.2580456407255705,
89
+ words_recognized_by_both: 1323.0,
90
+ words_recognized_by_first: 3554.0,
91
+ words_recognized_by_second: 2896.0,
92
+ length: 11,
93
+ matrix_first_alignment: '>>>>>>>>>>>',
94
+ matrix_second_alignment: '.>>>>>>>>>>',
95
+ shift: 1,
96
+ orientation: 'direct' #, discretization: 1.0
97
+ },
98
+ Helpers.eval_similarity_output('SP1_f1.pwm KLF4_f2.pwm -d 1'))
20
99
  end
21
-
100
+
101
+ def test_process_pcm_files
102
+ assert_equal( Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'),
103
+ Helpers.eval_similarity_output('KLF4_f2.pcm SP1_f1.pcm --pcm'))
104
+ end
105
+
22
106
  def test_process_first_motif_from_stdin
23
- assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n",
24
- Helpers.provide_stdin(File.read('test/data/SP1_f1.pat')){
25
- Helpers.eval_similarity_output('.stdin test/data/KLF4_f2.pat -d 1')
26
- }
107
+ result = Helpers.provide_stdin(File.read('KLF4_f2.pwm')){
108
+ Helpers.eval_similarity_output('.stdin SP1_f1.pwm') }
109
+ assert_equal(Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'), result)
27
110
  end
28
-
111
+
29
112
  def test_process_second_motif_from_stdin
30
- assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n",
31
- Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')){
32
- Helpers.eval_similarity_output('test/data/SP1_f1.pat .stdin -d 1')
33
- }
113
+ result = Helpers.provide_stdin(File.read('SP1_f1.pwm')){
114
+ Helpers.eval_similarity_output('KLF4_f2.pwm .stdin') }
115
+ assert_equal(Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'), result)
34
116
  end
35
117
 
36
118
  def test_process_both_motifs_from_stdin
37
- assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n",
38
- Helpers.provide_stdin(File.read('test/data/SP1_f1.pat') + File.read('test/data/KLF4_f2.pat')){
39
- Helpers.eval_similarity_output('.stdin .stdin -d 1')
40
- }
119
+ result = Helpers.provide_stdin(File.read('KLF4_f2.pwm') + File.read('SP1_f1.pwm')){
120
+ Helpers.eval_similarity_output('.stdin .stdin') }
121
+ assert_equal(Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'), result)
41
122
  end
42
-
43
-
44
-
45
123
  end
@@ -1,26 +1,39 @@
1
1
  require_relative 'test_helper'
2
2
 
3
3
  class FindPvalueTest < Test::Unit::TestCase
4
+ def setup
5
+ @start_dir = Dir.pwd
6
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
7
+ end
8
+ def teardown
9
+ Dir.chdir(@start_dir)
10
+ end
4
11
  def test_process_pcm
5
- assert_equal "4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pcm 4.1719 --pcm')
12
+ assert_equal [%w[4.1719 1048.0 0.00099945068359375]], Helpers.find_pvalue_output('KLF4_f2.pcm 4.1719 --pcm')
6
13
  end
7
14
  def test_process_one_threshold
8
- assert_equal "4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 4.1719')
15
+ assert_equal [%w[4.1719 1048.0 0.00099945068359375]], Helpers.find_pvalue_output('KLF4_f2.pwm 4.1719')
9
16
  end
10
17
  def test_process_several_thresholds
11
- assert_equal "4.1719\t1048.0\t0.00099945068359375\n5.2403\t524.0\t0.000499725341796875\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 4.1719 5.2403')
18
+ assert_equal [%w[4.1719 1048.0 0.00099945068359375],
19
+ %w[5.2403 524.0 0.000499725341796875]], Helpers.find_pvalue_output('KLF4_f2.pwm 4.1719 5.2403')
12
20
  end
13
21
  def test_process_several_thresholds_result_is_ordered
14
- assert_equal "5.2403\t524.0\t0.000499725341796875\n4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 5.2403 4.1719')
22
+ assert_equal [%w[5.2403 524.0 0.000499725341796875],
23
+ %w[4.1719 1048.0 0.00099945068359375]], Helpers.find_pvalue_output('KLF4_f2.pwm 5.2403 4.1719')
15
24
  end
16
25
  def test_custom_discretization
17
- assert_equal "5.2403\t527.0\t0.0005025863647460938\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 5.2403 -d 100')
26
+ assert_equal [%w[5.2403 527.0 0.0005025863647460938]], Helpers.find_pvalue_output('KLF4_f2.pwm 5.2403 -d 100')
27
+ end
28
+ def test_probability_wise_backgrond
29
+ assert_equal [%w[5.2403 0.0005025863647460938]], Helpers.find_pvalue_output('KLF4_f2.pwm 5.2403 -d 100 -b 0.25,0.25,0.25,0.25')
30
+ end
31
+ def test_custom_background
32
+ assert_equal [%w[5.2403 6.815000000000001e-06]], Helpers.find_pvalue_output('KLF4_f2.pwm 5.2403 -b 0.4,0.1,0.1,0.4')
18
33
  end
19
34
  def test_process_pwm_from_stdin
20
- assert_equal Helpers.find_pvalue_output('test/data/KLF4_f2.pat 1'),
21
- Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')) {
22
- Helpers.find_pvalue_output('.stdin 1')
23
- }
35
+ assert_equal Helpers.find_pvalue_output('KLF4_f2.pwm 1'),
36
+ Helpers.provide_stdin(File.read 'KLF4_f2.pwm'){ Helpers.find_pvalue_output('.stdin 1') }
24
37
  end
25
38
  end
26
39
 
@@ -1,40 +1,91 @@
1
1
  require_relative 'test_helper'
2
2
 
3
3
  class FindThresholdTest < Test::Unit::TestCase
4
+ include Helpers
5
+ def setup
6
+ @start_dir = Dir.pwd
7
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
8
+ end
9
+ def teardown
10
+ Dir.chdir(@start_dir)
11
+ end
12
+
13
+ def test_process_one_pvalue_weak_thresold
14
+ assert_threshold_info_output({requested_pvalue: 0.001,
15
+ real_pvalue: 0.0010004043579101562,
16
+ number_of_recognized_words: 1049.0,
17
+ threshold: 4.1718},
18
+ Helpers.find_threshold_output("KLF4_f2.pwm 0.001 --boundary upper") )
19
+ # additional consistency checks
20
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.1718"), '0.0010004043579101562'
21
+ end
22
+
23
+ def test_process_one_pvalue_strong_thresold
24
+ assert_threshold_info_output({requested_pvalue: 0.001,
25
+ real_pvalue: 0.00099945068359375,
26
+ number_of_recognized_words: 1048.0,
27
+ threshold: 4.17189},
28
+ Helpers.find_threshold_output("KLF4_f2.pwm 0.001") )
29
+ # additional consistency checks
30
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
31
+ end
32
+
4
33
  def test_process_several_pvalues
5
34
  pvalues = []
6
- Helpers.find_threshold_output('test/data/KLF4_f2.pat -p 0.001 0.0005').lines.each{|line|
7
- pvalue, threshold, real_pvalue = line.strip.split("\t")
8
- pvalues << pvalue
9
- assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
10
- }
11
- assert_equal pvalues, ['0.0005', '0.001']
35
+ assert_threshold_info_output({requested_pvalue: 0.0005,
36
+ real_pvalue: 0.000499725341796875,
37
+ number_of_recognized_words: 524.0,
38
+ threshold: 5.24071},
39
+ {requested_pvalue: 0.001,
40
+ real_pvalue: 0.00099945068359375,
41
+ number_of_recognized_words: 1048.0,
42
+ threshold: 4.17189},
43
+ Helpers.find_threshold_output('KLF4_f2.pwm 0.001 0.0005') )
44
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
45
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.24071"), '0.000499725341796875'
12
46
  end
47
+
13
48
  def test_process_pcm
14
- pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pcm -p 0.001 --pcm').strip.split("\t")
15
- assert_equal '0.001', pvalue
16
- assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
17
- end
18
- def test_process_one_pvalue
19
- pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pat -p 0.001').strip.split("\t")
20
- assert_equal '0.001', pvalue
21
- assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
49
+ assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm"),
50
+ Helpers.find_threshold_output("KLF4_f2.pcm --pcm"))
22
51
  end
52
+
23
53
  def test_process_default_pvalue
24
- pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pat').strip.split("\t")
25
- assert_equal '0.0005', pvalue
26
- assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
54
+ assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm 0.0005"),
55
+ Helpers.find_threshold_output("KLF4_f2.pwm"))
27
56
  end
28
57
  def test_custom_discretization
29
- pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pat -d 100').strip.split("\t")
30
- assert_equal '0.0005', pvalue
31
- assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold} -d 100"), real_pvalue
58
+ assert_threshold_info_output({requested_pvalue: 0.0005,
59
+ real_pvalue: 0.0004978179931640625,
60
+ number_of_recognized_words: 522.0,
61
+ threshold: 5.281000000000001},
62
+ Helpers.find_threshold_output("KLF4_f2.pwm -d 100") )
63
+ # additional consistency checks
64
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.281000000000001 -d 100"), '0.0004978179931640625'
65
+ end
66
+ def test_custom_background
67
+ assert_threshold_info_output({requested_pvalue: 0.0005,
68
+ real_pvalue: '0.00049964290000001',
69
+ threshold: '-0.10449000000000001'},
70
+ Helpers.find_threshold_output("KLF4_f2.pwm -b 0.4,0.1,0.1,0.4") )
71
+ # additional consistency checks
72
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm -0.10449000000000001 -b 0.4,0.1,0.1,0.4"), '0.0004996429000000166' # here real pvalue differs at last digits =\
32
73
  end
33
74
  def test_process_pwm_from_stdin
34
- assert_equal Helpers.find_threshold_output('test/data/KLF4_f2.pat'),
35
- Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')) {
36
- Helpers.find_threshold_output('.stdin')
37
- }
75
+ assert_equal Helpers.find_threshold_output('KLF4_f2.pwm'),
76
+ Helpers.provide_stdin(File.read('KLF4_f2.pwm')){ Helpers.find_threshold_output('.stdin') }
38
77
  end
39
- end
40
78
 
79
+ # TODO: it should be rewritten as a spec for count_distribution_under_pvalue - not to raise an error(log out of domain) and return a value
80
+ def test_process_large_pvalue
81
+ assert_nothing_raised do
82
+ # discretization is set not to take very long time calculation
83
+ assert_threshold_info_output({requested_pvalue: 0.8,
84
+ real_pvalue: 0.7996518611907959,
85
+ number_of_recognized_words: 3353983.0,
86
+ threshold: -17.89},
87
+ Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') )
88
+ end
89
+ assert_equal Helpers.obtain_pvalue_by_threshold("SP1_f1.pwm -17.89 -d 10"), '0.7996518611907959'
90
+ end
91
+ end
@@ -1,7 +1,7 @@
1
1
  require_relative 'test_helper'
2
2
  require 'yaml'
3
3
 
4
- # Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.93 p194
4
+ # Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.9.3 p194
5
5
  # it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
6
6
 
7
7
  class TestPreprocessCollection < Test::Unit::TestCase
@@ -11,60 +11,55 @@ class TestPreprocessCollection < Test::Unit::TestCase
11
11
  end
12
12
  def teardown
13
13
  File.delete('test_collection.yaml.tmp') if File.exist? 'test_collection.yaml.tmp'
14
+ File.delete('my_collection.yaml') if File.exist? 'my_collection.yaml'
14
15
  Dir.chdir(@start_dir)
15
16
  end
16
17
 
18
+ def test_weak_thresholds
19
+ Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent')
20
+ assert_equal YAML.load(File.read('test_collection_weak.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
21
+ end
22
+
17
23
  def test_multipvalue_preprocessing
18
- Helpers.run_preprocess_collection('test_collection -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
24
+ Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
19
25
  assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
20
26
  end
21
27
 
22
28
  def test_preprocessing_collection_from_a_single_file
23
- Helpers.run_preprocess_collection('test_collection_single_file.txt -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
29
+ Helpers.run_preprocess_collection('test_collection_single_file.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
24
30
  assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
25
31
  end
26
32
 
27
33
  def test_preprocessing_collection_from_stdin
28
- Helpers.provide_stdin('test_collection/GABPA_f1.pat test_collection/KLF4_f2.pat test_collection/SP1_f1.pat'){
29
- Helpers.run_preprocess_collection('.stdin -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
34
+ Helpers.provide_stdin('test_collection/GABPA_f1.pwm test_collection/KLF4_f2.pwm test_collection/SP1_f1.pwm'){
35
+ Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
30
36
  }
31
37
  assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
32
38
  end
33
39
 
34
40
  def test_preprocessing_folder_pcm
35
- Helpers.run_preprocess_collection('test_collection_pcm -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent --pcm')
41
+ Helpers.run_preprocess_collection('test_collection_pcm test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
36
42
  assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
37
43
  end
38
44
 
39
45
  def test_preprocessing_collection_from_a_single_file_pcm
40
- Helpers.run_preprocess_collection('test_collection_single_file_pcm.txt -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent --pcm')
46
+ Helpers.run_preprocess_collection('test_collection_single_file_pcm.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
41
47
  assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
42
48
  end
43
49
 
44
50
  def test_preprocessing_collection_from_a_collection
45
- Helpers.run_preprocess_collection('collection_without_thresholds.yaml -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
51
+ Helpers.run_preprocess_collection('collection_without_thresholds.yaml test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
46
52
  assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
47
53
  end
48
54
  def test_preprocessing_collection_from_a_pcm_collection
49
- Helpers.run_preprocess_collection('collection_pcm_without_thresholds.yaml -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent --pcm')
55
+ Helpers.run_preprocess_collection('collection_pcm_without_thresholds.yaml test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
50
56
  assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
51
57
  end
52
58
 
53
59
  def test_preprocessing_collection_from_stdin_pcm
54
60
  Helpers.provide_stdin('test_collection_pcm/GABPA_f1.pcm test_collection_pcm/KLF4_f2.pcm test_collection_pcm/SP1_f1.pcm'){
55
- Helpers.run_preprocess_collection('.stdin -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent --pcm')
61
+ Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
56
62
  }
57
63
  assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
58
64
  end
59
-
60
- def test_with_name_specified
61
- Helpers.run_preprocess_collection('test_collection -n my_collection -p 0.0005 0.0001 0.00005 --silent')
62
- assert_equal YAML.load(File.read('test_collection.yaml')).set_parameters(name:'my_collection'), YAML.load(File.read('my_collection.yaml'))
63
- File.delete('my_collection.yaml')
64
- end
65
-
66
- def test_with_name_and_output_specified
67
- Helpers.run_preprocess_collection('test_collection -n my_collection -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
68
- assert_equal YAML.load(File.read('test_collection.yaml')).set_parameters(name:'my_collection'), YAML.load(File.read('test_collection.yaml.tmp'))
69
- end
70
65
  end
@@ -1,36 +1,48 @@
1
1
  require_relative 'test_helper'
2
2
 
3
3
  class TestScanCollection < Test::Unit::TestCase
4
+ def setup
5
+ @start_dir = Dir.pwd
6
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
7
+ end
8
+ def teardown
9
+ Dir.chdir(@start_dir)
10
+ end
11
+
4
12
  def test_scan_pcm
5
- assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
6
- Helpers.scan_collection_output('test/data/KLF4_f2.pcm test/data/test_collection.yaml --silent --pcm').gsub("\r\n","\n")
13
+ assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
14
+ Helpers.scan_collection_output('KLF4_f2.pcm test_collection.yaml --silent --pcm --boundary lower').gsub("\r\n","\n")
7
15
  end
8
16
  def test_scan_default_cutoff
9
- assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
10
- Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --silent').gsub("\r\n","\n")
17
+ assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
18
+ Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower').gsub("\r\n","\n")
19
+ end
20
+ def test_scan_weak_threshold
21
+ assert_equal File.read('KLF4_f2_scan_results_weak_threshold.txt').gsub("\r\n", "\n"),
22
+ Helpers.scan_collection_output('KLF4_f2.pwm test_collection_weak.yaml --silent').gsub("\r\n","\n")
11
23
  end
12
24
  def test_scan_and_output_all_results
13
- assert_equal File.read('test/data/KLF4_f2_scan_results_all.txt').gsub("\r\n", "\n"),
14
- Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --all --silent').gsub("\r\n","\n")
15
-
25
+ assert_equal File.read('KLF4_f2_scan_results_all.txt').gsub("\r\n", "\n"),
26
+ Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --all --silent --boundary lower').gsub("\r\n","\n")
27
+
16
28
  end
17
29
  def test_scan_precise_mode
18
- assert_equal File.read('test/data/KLF4_f2_scan_results_precise_mode.txt').gsub("\r\n","\n"),
19
- Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --precise --all --silent').gsub("\r\n", "\n")
30
+ assert_equal File.read('KLF4_f2_scan_results_precise_mode.txt').gsub("\r\n","\n"),
31
+ Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
20
32
  end
21
33
  def test_process_query_pwm_from_stdin
22
- assert_equal Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --silent'),
23
- Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')) {
24
- Helpers.scan_collection_output('.stdin test/data/test_collection.yaml --silent')
34
+ assert_equal Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower'),
35
+ Helpers.provide_stdin(File.read('KLF4_f2.pwm')) {
36
+ Helpers.scan_collection_output('.stdin test_collection.yaml --silent --boundary lower')
25
37
  }
26
38
  end
27
39
 
28
40
  def test_scan_medium_length_motif
29
41
  assert_match /Query motif medium_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the rough discretization level of 1. Forcing precise discretization level of 10/,
30
- Helpers.scan_collection_stderr('test/data/medium_motif.pat test/data/test_collection.yaml --precise --all --silent').gsub("\r\n", "\n")
42
+ Helpers.scan_collection_stderr('medium_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
31
43
  end
32
44
  def test_scan_short_length_motif
33
45
  assert_match /Query motif short_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the precise discretization level of 10\. It.s impossible to scan collection for this motif/,
34
- Helpers.scan_collection_stderr('test/data/short_motif.pat test/data/test_collection.yaml --precise --all --silent').gsub("\r\n", "\n")
46
+ Helpers.scan_collection_stderr('short_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
35
47
  end
36
48
  end