macroape 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -17
  3. data/Gemfile +4 -4
  4. data/LICENSE +22 -22
  5. data/README.md +70 -70
  6. data/Rakefile.rb +49 -49
  7. data/TODO.txt +46 -46
  8. data/benchmark/benchmark_helper.rb +4 -4
  9. data/benchmark/similarity_benchmark.rb +52 -52
  10. data/bin/align_motifs +4 -4
  11. data/bin/eval_alignment +4 -4
  12. data/bin/eval_similarity +4 -4
  13. data/bin/find_pvalue +4 -4
  14. data/bin/find_threshold +4 -4
  15. data/bin/preprocess_collection +4 -4
  16. data/bin/scan_collection +4 -4
  17. data/lib/macroape.rb +14 -11
  18. data/lib/macroape/aligned_pair_intersection.rb +61 -62
  19. data/lib/macroape/cli.rb +191 -188
  20. data/lib/macroape/cli/align_motifs.rb +120 -100
  21. data/lib/macroape/cli/eval_alignment.rb +157 -156
  22. data/lib/macroape/cli/eval_similarity.rb +138 -137
  23. data/lib/macroape/cli/find_pvalue.rb +93 -87
  24. data/lib/macroape/cli/find_threshold.rb +103 -96
  25. data/lib/macroape/cli/preprocess_collection.rb +169 -161
  26. data/lib/macroape/cli/scan_collection.rb +171 -163
  27. data/lib/macroape/collection.rb +29 -0
  28. data/lib/macroape/motif_with_thresholds.rb +18 -0
  29. data/lib/macroape/pwm_compare.rb +39 -44
  30. data/lib/macroape/pwm_compare_aligned.rb +139 -130
  31. data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
  32. data/lib/macroape/support/inverf.rb +13 -0
  33. data/lib/macroape/support/partial_sums.rb +17 -0
  34. data/lib/macroape/version.rb +4 -4
  35. data/macroape.gemspec +19 -19
  36. data/spec/count_distribution_spec.rb +112 -109
  37. data/spec/inverf_spec.rb +23 -0
  38. data/spec/partial_sums_spec.rb +28 -0
  39. data/spec/spec_helper.rb +11 -11
  40. data/test/align_motifs_test.rb +42 -43
  41. data/test/data/AHR_si.pwm +10 -10
  42. data/test/data/KLF3_f1.pcm +16 -16
  43. data/test/data/KLF3_f1.pwm +16 -16
  44. data/test/data/KLF4_f2.pcm +11 -11
  45. data/test/data/KLF4_f2.pwm +11 -11
  46. data/test/data/KLF4_f2_scan_results_all.txt +2 -2
  47. data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
  48. data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
  49. data/test/data/SP1_f1.pcm +12 -12
  50. data/test/data/SP1_f1.pwm +12 -12
  51. data/test/data/SP1_f1_revcomp.pcm +12 -12
  52. data/test/data/SP1_f1_revcomp.pwm +12 -12
  53. data/test/data/medium_motif.pwm +8 -8
  54. data/test/data/short_motif.pwm +7 -7
  55. data/test/data/test_collection.yaml +231 -214
  56. data/test/data/test_collection/GABPA_f1.pwm +14 -14
  57. data/test/data/test_collection/KLF4_f2.pwm +10 -10
  58. data/test/data/test_collection/SP1_f1.pwm +12 -12
  59. data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
  60. data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
  61. data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
  62. data/test/data/test_collection_single_file.txt +38 -38
  63. data/test/data/test_collection_single_file_pcm.txt +37 -37
  64. data/test/data/test_collection_weak.yaml +231 -214
  65. data/test/eval_alignment_test.rb +90 -111
  66. data/test/eval_similarity_test.rb +105 -123
  67. data/test/find_pvalue_test.rb +34 -39
  68. data/test/find_threshold_test.rb +87 -91
  69. data/test/preprocess_collection_test.rb +56 -65
  70. data/test/scan_collection_test.rb +42 -48
  71. data/test/test_helper.rb +159 -160
  72. metadata +14 -10
  73. data/test/data/collection_pcm_without_thresholds.yaml +0 -188
  74. data/test/data/collection_without_thresholds.yaml +0 -188
@@ -1,91 +1,87 @@
1
- require_relative 'test_helper'
2
-
3
- class FindThresholdTest < Test::Unit::TestCase
4
- include Helpers
5
- def setup
6
- @start_dir = Dir.pwd
7
- Dir.chdir File.join(File.dirname(__FILE__), 'data')
8
- end
9
- def teardown
10
- Dir.chdir(@start_dir)
11
- end
12
-
13
- def test_process_one_pvalue_weak_thresold
14
- assert_threshold_info_output({requested_pvalue: 0.001,
15
- real_pvalue: 0.0010004043579101562,
16
- number_of_recognized_words: 1049.0,
17
- threshold: 4.1718},
18
- Helpers.find_threshold_output("KLF4_f2.pwm 0.001 --boundary upper") )
19
- # additional consistency checks
20
- assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.1718"), '0.0010004043579101562'
21
- end
22
-
23
- def test_process_one_pvalue_strong_thresold
24
- assert_threshold_info_output({requested_pvalue: 0.001,
25
- real_pvalue: 0.00099945068359375,
26
- number_of_recognized_words: 1048.0,
27
- threshold: 4.17189},
28
- Helpers.find_threshold_output("KLF4_f2.pwm 0.001") )
29
- # additional consistency checks
30
- assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
31
- end
32
-
33
- def test_process_several_pvalues
34
- pvalues = []
35
- assert_threshold_info_output({requested_pvalue: 0.0005,
36
- real_pvalue: 0.000499725341796875,
37
- number_of_recognized_words: 524.0,
38
- threshold: 5.24071},
39
- {requested_pvalue: 0.001,
40
- real_pvalue: 0.00099945068359375,
41
- number_of_recognized_words: 1048.0,
42
- threshold: 4.17189},
43
- Helpers.find_threshold_output('KLF4_f2.pwm 0.001 0.0005') )
44
- assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
45
- assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.24071"), '0.000499725341796875'
46
- end
47
-
48
- def test_process_pcm
49
- assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm"),
50
- Helpers.find_threshold_output("KLF4_f2.pcm --pcm"))
51
- end
52
-
53
- def test_process_default_pvalue
54
- assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm 0.0005"),
55
- Helpers.find_threshold_output("KLF4_f2.pwm"))
56
- end
57
- def test_custom_discretization
58
- assert_threshold_info_output({requested_pvalue: 0.0005,
59
- real_pvalue: 0.0004978179931640625,
60
- number_of_recognized_words: 522.0,
61
- threshold: 5.281000000000001},
62
- Helpers.find_threshold_output("KLF4_f2.pwm -d 100") )
63
- # additional consistency checks
64
- assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.281000000000001 -d 100"), '0.0004978179931640625'
65
- end
66
- def test_custom_background
67
- assert_threshold_info_output({requested_pvalue: 0.0005,
68
- real_pvalue: '0.00049964290000001',
69
- threshold: '-0.10449000000000001'},
70
- Helpers.find_threshold_output("KLF4_f2.pwm -b 0.4,0.1,0.1,0.4") )
71
- # additional consistency checks
72
- assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm -0.10449000000000001 -b 0.4,0.1,0.1,0.4"), '0.0004996429000000166' # here real pvalue differs at last digits =\
73
- end
74
- def test_process_pwm_from_stdin
75
- assert_equal Helpers.find_threshold_output('KLF4_f2.pwm'),
76
- Helpers.provide_stdin(File.read('KLF4_f2.pwm')){ Helpers.find_threshold_output('.stdin') }
77
- end
78
-
79
- # TODO: it should be rewritten as a spec for count_distribution_under_pvalue - not to raise an error(log out of domain) and return a value
80
- def test_process_large_pvalue
81
- assert_nothing_raised do
82
- # discretization is set not to take very long time calculation
83
- assert_threshold_info_output({requested_pvalue: 0.8,
84
- real_pvalue: 0.7996518611907959,
85
- number_of_recognized_words: 3353983.0,
86
- threshold: -17.89},
87
- Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') )
88
- end
89
- assert_equal Helpers.obtain_pvalue_by_threshold("SP1_f1.pwm -17.89 -d 10"), '0.7996518611907959'
90
- end
91
- end
1
+ require_relative 'test_helper'
2
+
3
+ class FindThresholdTest < Test::Unit::TestCase
4
+ include Helpers
5
+ def setup
6
+ @start_dir = Dir.pwd
7
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
8
+ end
9
+ def teardown
10
+ Dir.chdir(@start_dir)
11
+ end
12
+
13
+ def test_process_one_pvalue_weak_thresold
14
+ assert_threshold_info_output({requested_pvalue: 0.001,
15
+ real_pvalue: 0.0010004043579101562,
16
+ number_of_recognized_words: 1049.0,
17
+ threshold: 4.1718},
18
+ Helpers.find_threshold_output("KLF4_f2.pwm 0.001 --boundary upper") )
19
+ # additional consistency checks
20
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.1718"), '0.0010004043579101562'
21
+ end
22
+
23
+ def test_process_one_pvalue_strong_thresold
24
+ assert_threshold_info_output({requested_pvalue: 0.001,
25
+ real_pvalue: 0.00099945068359375,
26
+ number_of_recognized_words: 1048.0,
27
+ threshold: 4.17189},
28
+ Helpers.find_threshold_output("KLF4_f2.pwm 0.001") )
29
+ # additional consistency checks
30
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
31
+ end
32
+
33
+ def test_process_several_pvalues
34
+ pvalues = []
35
+ assert_threshold_info_output({requested_pvalue: 0.0005,
36
+ real_pvalue: 0.000499725341796875,
37
+ number_of_recognized_words: 524.0,
38
+ threshold: 5.24071},
39
+ {requested_pvalue: 0.001,
40
+ real_pvalue: 0.00099945068359375,
41
+ number_of_recognized_words: 1048.0,
42
+ threshold: 4.17189},
43
+ Helpers.find_threshold_output('KLF4_f2.pwm 0.001 0.0005') )
44
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
45
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.24071"), '0.000499725341796875'
46
+ end
47
+
48
+ def test_process_pcm
49
+ assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm"),
50
+ Helpers.find_threshold_output("KLF4_f2.pcm --pcm"))
51
+ end
52
+
53
+ def test_process_default_pvalue
54
+ assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm 0.0005"),
55
+ Helpers.find_threshold_output("KLF4_f2.pwm"))
56
+ end
57
+ def test_custom_discretization
58
+ assert_threshold_info_output({requested_pvalue: 0.0005,
59
+ real_pvalue: 0.0004978179931640625,
60
+ number_of_recognized_words: 522.0,
61
+ threshold: 5.281000000000001},
62
+ Helpers.find_threshold_output("KLF4_f2.pwm -d 100") )
63
+ # additional consistency checks
64
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.281000000000001 -d 100"), '0.0004978179931640625'
65
+ end
66
+ def test_custom_background
67
+ assert_threshold_info_output({requested_pvalue: 0.0005,
68
+ real_pvalue: '0.00049964290000001',
69
+ threshold: '-0.10449000000000001'},
70
+ Helpers.find_threshold_output("KLF4_f2.pwm -b 0.4,0.1,0.1,0.4") )
71
+ # additional consistency checks
72
+ assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm -0.10449000000000001 -b 0.4,0.1,0.1,0.4"), '0.0004996429000000166' # here real pvalue differs at last digits =\
73
+ end
74
+
75
+ # TODO: it should be rewritten as a spec for count_distribution_under_pvalue - not to raise an error(log out of domain) and return a value
76
+ def test_process_large_pvalue
77
+ assert_nothing_raised do
78
+ # discretization is set not to take very long time calculation
79
+ assert_threshold_info_output({requested_pvalue: 0.8,
80
+ real_pvalue: 0.7996518611907959,
81
+ number_of_recognized_words: 3353983.0,
82
+ threshold: -17.89},
83
+ Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') )
84
+ end
85
+ assert_equal Helpers.obtain_pvalue_by_threshold("SP1_f1.pwm -17.89 -d 10"), '0.7996518611907959'
86
+ end
87
+ end
@@ -1,65 +1,56 @@
1
- require_relative 'test_helper'
2
- require 'yaml'
3
-
4
- # Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.9.3 p194
5
- # it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
6
-
7
- class TestPreprocessCollection < Test::Unit::TestCase
8
- def setup
9
- @start_dir = Dir.pwd
10
- Dir.chdir File.join(File.dirname(__FILE__), 'data')
11
- end
12
- def teardown
13
- File.delete('test_collection.yaml.tmp') if File.exist? 'test_collection.yaml.tmp'
14
- File.delete('my_collection.yaml') if File.exist? 'my_collection.yaml'
15
- Dir.chdir(@start_dir)
16
- end
17
-
18
- def test_weak_thresholds
19
- Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent')
20
- assert_equal YAML.load(File.read('test_collection_weak.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
21
- end
22
-
23
- def test_multipvalue_preprocessing
24
- Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
25
- assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
26
- end
27
-
28
- def test_preprocessing_collection_from_a_single_file
29
- Helpers.run_preprocess_collection('test_collection_single_file.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
30
- assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
31
- end
32
-
33
- def test_preprocessing_collection_from_stdin
34
- Helpers.provide_stdin('test_collection/GABPA_f1.pwm test_collection/KLF4_f2.pwm test_collection/SP1_f1.pwm'){
35
- Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
36
- }
37
- assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
38
- end
39
-
40
- def test_preprocessing_folder_pcm
41
- Helpers.run_preprocess_collection('test_collection_pcm test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
42
- assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
43
- end
44
-
45
- def test_preprocessing_collection_from_a_single_file_pcm
46
- Helpers.run_preprocess_collection('test_collection_single_file_pcm.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
47
- assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
48
- end
49
-
50
- def test_preprocessing_collection_from_a_collection
51
- Helpers.run_preprocess_collection('collection_without_thresholds.yaml test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
52
- assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
53
- end
54
- def test_preprocessing_collection_from_a_pcm_collection
55
- Helpers.run_preprocess_collection('collection_pcm_without_thresholds.yaml test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
56
- assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
57
- end
58
-
59
- def test_preprocessing_collection_from_stdin_pcm
60
- Helpers.provide_stdin('test_collection_pcm/GABPA_f1.pcm test_collection_pcm/KLF4_f2.pcm test_collection_pcm/SP1_f1.pcm'){
61
- Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
62
- }
63
- assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
64
- end
65
- end
1
+ require_relative 'test_helper'
2
+ require 'yaml'
3
+
4
+ # Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.9.3 p194
5
+ # it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
6
+
7
+ class TestPreprocessCollection < Test::Unit::TestCase
8
+ def setup
9
+ @start_dir = Dir.pwd
10
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
11
+ end
12
+ def teardown
13
+ File.delete('test_collection.yaml.tmp') if File.exist? 'test_collection.yaml.tmp'
14
+ File.delete('my_collection.yaml') if File.exist? 'my_collection.yaml'
15
+ Dir.chdir(@start_dir)
16
+ end
17
+
18
+ def test_weak_thresholds
19
+ Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent')
20
+ assert_equal YAML.load(File.read('test_collection_weak.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
21
+ end
22
+
23
+ def test_multipvalue_preprocessing
24
+ Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
25
+ assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
26
+ end
27
+
28
+ def test_preprocessing_collection_from_a_single_file
29
+ Helpers.run_preprocess_collection('test_collection_single_file.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
30
+ assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
31
+ end
32
+
33
+ def test_preprocessing_collection_from_stdin
34
+ Helpers.provide_stdin('test_collection/GABPA_f1.pwm test_collection/KLF4_f2.pwm test_collection/SP1_f1.pwm'){
35
+ Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
36
+ }
37
+ assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
38
+ end
39
+
40
+ def test_preprocessing_folder_pcm
41
+ Helpers.run_preprocess_collection('test_collection_pcm test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
42
+ assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
43
+ end
44
+
45
+ def test_preprocessing_collection_from_a_single_file_pcm
46
+ Helpers.run_preprocess_collection('test_collection_single_file_pcm.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
47
+ assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
48
+ end
49
+
50
+ def test_preprocessing_collection_from_stdin_pcm
51
+ Helpers.provide_stdin('test_collection_pcm/GABPA_f1.pcm test_collection_pcm/KLF4_f2.pcm test_collection_pcm/SP1_f1.pcm'){
52
+ Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
53
+ }
54
+ assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
55
+ end
56
+ end
@@ -1,48 +1,42 @@
1
- require_relative 'test_helper'
2
-
3
- class TestScanCollection < Test::Unit::TestCase
4
- def setup
5
- @start_dir = Dir.pwd
6
- Dir.chdir File.join(File.dirname(__FILE__), 'data')
7
- end
8
- def teardown
9
- Dir.chdir(@start_dir)
10
- end
11
-
12
- def test_scan_pcm
13
- assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
14
- Helpers.scan_collection_output('KLF4_f2.pcm test_collection.yaml --silent --pcm --boundary lower').gsub("\r\n","\n")
15
- end
16
- def test_scan_default_cutoff
17
- assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
18
- Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower').gsub("\r\n","\n")
19
- end
20
- def test_scan_weak_threshold
21
- assert_equal File.read('KLF4_f2_scan_results_weak_threshold.txt').gsub("\r\n", "\n"),
22
- Helpers.scan_collection_output('KLF4_f2.pwm test_collection_weak.yaml --silent').gsub("\r\n","\n")
23
- end
24
- def test_scan_and_output_all_results
25
- assert_equal File.read('KLF4_f2_scan_results_all.txt').gsub("\r\n", "\n"),
26
- Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --all --silent --boundary lower').gsub("\r\n","\n")
27
-
28
- end
29
- def test_scan_precise_mode
30
- assert_equal File.read('KLF4_f2_scan_results_precise_mode.txt').gsub("\r\n","\n"),
31
- Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
32
- end
33
- def test_process_query_pwm_from_stdin
34
- assert_equal Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower'),
35
- Helpers.provide_stdin(File.read('KLF4_f2.pwm')) {
36
- Helpers.scan_collection_output('.stdin test_collection.yaml --silent --boundary lower')
37
- }
38
- end
39
-
40
- def test_scan_medium_length_motif
41
- assert_match /Query motif medium_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the rough discretization level of 1. Forcing precise discretization level of 10/,
42
- Helpers.scan_collection_stderr('medium_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
43
- end
44
- def test_scan_short_length_motif
45
- assert_match /Query motif short_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the precise discretization level of 10\. It.s impossible to scan collection for this motif/,
46
- Helpers.scan_collection_stderr('short_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
47
- end
48
- end
1
+ require_relative 'test_helper'
2
+
3
+ class TestScanCollection < Test::Unit::TestCase
4
+ def setup
5
+ @start_dir = Dir.pwd
6
+ Dir.chdir File.join(File.dirname(__FILE__), 'data')
7
+ end
8
+ def teardown
9
+ Dir.chdir(@start_dir)
10
+ end
11
+
12
+ def test_scan_pcm
13
+ assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
14
+ Helpers.scan_collection_output('KLF4_f2.pcm test_collection.yaml --silent --pcm --boundary lower').gsub("\r\n","\n")
15
+ end
16
+ def test_scan_default_cutoff
17
+ assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
18
+ Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower').gsub("\r\n","\n")
19
+ end
20
+ def test_scan_weak_threshold
21
+ assert_equal File.read('KLF4_f2_scan_results_weak_threshold.txt').gsub("\r\n", "\n"),
22
+ Helpers.scan_collection_output('KLF4_f2.pwm test_collection_weak.yaml --silent').gsub("\r\n","\n")
23
+ end
24
+ def test_scan_and_output_all_results
25
+ assert_equal File.read('KLF4_f2_scan_results_all.txt').gsub("\r\n", "\n"),
26
+ Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --all --silent --boundary lower').gsub("\r\n","\n")
27
+
28
+ end
29
+ def test_scan_precise_mode
30
+ assert_equal File.read('KLF4_f2_scan_results_precise_mode.txt').gsub("\r\n","\n"),
31
+ Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
32
+ end
33
+
34
+ def test_scan_medium_length_motif
35
+ assert_match /Query motif medium_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the rough discretization level of 1. Forcing precise discretization level of 10/,
36
+ Helpers.scan_collection_stderr('medium_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
37
+ end
38
+ def test_scan_short_length_motif
39
+ assert_match /Query motif short_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the precise discretization level of 10\. It.s impossible to scan collection for this motif/,
40
+ Helpers.scan_collection_stderr('short_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
41
+ end
42
+ end
@@ -1,160 +1,159 @@
1
- $bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
2
- $LOAD_PATH.unshift $bioinform_folder
3
-
4
- require 'test/unit'
5
- require 'stringio'
6
- require 'shellwords'
7
-
8
- require_relative '../lib/macroape/cli/find_threshold'
9
- require_relative '../lib/macroape/cli/find_pvalue'
10
- require_relative '../lib/macroape/cli/eval_similarity'
11
- require_relative '../lib/macroape/cli/eval_alignment'
12
- require_relative '../lib/macroape/cli/preprocess_collection'
13
- require_relative '../lib/macroape/cli/scan_collection'
14
- require_relative '../lib/macroape/cli/align_motifs'
15
-
16
- module Helpers
17
- # from minitest
18
- def self.capture_io(&block)
19
- orig_stdout, orig_stderr = $stdout, $stderr
20
- captured_stdout, captured_stderr = StringIO.new, StringIO.new
21
- $stdout, $stderr = captured_stdout, captured_stderr
22
- yield
23
- return {stdout: captured_stdout.string, stderr: captured_stderr.string}
24
- ensure
25
- $stdout = orig_stdout
26
- $stderr = orig_stderr
27
- end
28
-
29
- def self.suppress_output(&block)
30
- orig_stdout, orig_stderr = $stdout, $stderr
31
- captured_stdout, captured_stderr = StringIO.new, StringIO.new
32
- $stdout, $stderr = captured_stdout, captured_stderr
33
- yield
34
- ensure
35
- $stdout = orig_stdout
36
- $stderr = orig_stderr
37
- end
38
-
39
- # Method stubs $stdin not STDIN !
40
- def self.provide_stdin(input, &block)
41
- orig_stdin = $stdin
42
- $stdin = StringIO.new(input)
43
- yield
44
- ensure
45
- $stdin = orig_stdin
46
- end
47
-
48
- def self.capture_output(&block)
49
- capture_io(&block)[:stdout]
50
- end
51
- def self.capture_stderr(&block)
52
- capture_io(&block)[:stderr]
53
- end
54
-
55
- # aaa\tbbb\nccc\tddd ==> [['aaa','bbb'],['ccc','ddd']]
56
- def self.split_on_lines(str)
57
- str.lines.map{|line| line.strip.split("\t")}
58
- end
59
-
60
- def self.obtain_pvalue_by_threshold(args)
61
- find_pvalue_output(args).last.last
62
- end
63
- def self.exec_cmd(executable, param_list)
64
- "ruby -I #{$lib_folder} #{$lib_folder}/../bin/#{executable} #{param_list}"
65
- end
66
- def self.find_threshold_output(param_list)
67
- capture_output{ Macroape::CLI::FindThreshold.main(param_list.shellsplit) }
68
- end
69
- def self.align_motifs_output(param_list)
70
- split_on_lines( capture_output{ Macroape::CLI::AlignMotifs.main(param_list.shellsplit)} )
71
- end
72
- def self.find_pvalue_output(param_list)
73
- capture_output{ Macroape::CLI::FindPValue.main(param_list.shellsplit)} .lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).map{|line|line.split("\t")}
74
- end
75
- def self.eval_similarity_output(param_list)
76
- capture_output{ Macroape::CLI::EvalSimilarity.main(param_list.shellsplit)}
77
- end
78
- def self.eval_alignment_output(param_list)
79
- capture_output{ Macroape::CLI::EvalAlignment.main(param_list.shellsplit)}
80
- end
81
- def self.scan_collection_output(param_list)
82
- capture_output{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }.lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).join("\n")
83
- end
84
- def self.scan_collection_stderr(param_list)
85
- capture_stderr{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }
86
- end
87
- def self.run_preprocess_collection(param_list)
88
- suppress_output{ Macroape::CLI::PreprocessCollection.main(param_list.shellsplit) }
89
- end
90
-
91
- def parse_similarity_infos_string(info_string)
92
- infos = {}
93
- info_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
94
- key, value = line.split
95
- case key
96
- when 'S' then infos[:similarity] = value
97
- when 'D' then infos[:distance] = value
98
- when 'L' then infos[:length] = value
99
- when 'SH' then infos[:shift] = value
100
- when 'OR' then infos[:orientation] = value
101
- when 'W' then infos[:words_recognized_by_both] = value
102
-
103
- when 'W1' then infos[:words_recognized_by_first] = value
104
- when 'P1' then infos[:pvalue_recognized_by_first] = value
105
- when 'T1' then infos[:threshold_first] = value
106
-
107
- when 'W2' then infos[:words_recognized_by_second] = value
108
- when 'P2' then infos[:pvalue_recognized_by_second] = value
109
- when 'T2' then infos[:threshold_second] = value
110
-
111
- when 'A1' then infos[:matrix_first_alignment] = value
112
- when 'A2' then infos[:matrix_second_alignment] = value
113
-
114
- when 'V' then infos[:discretization] = value
115
- end
116
- end
117
- infos
118
- end
119
-
120
- def assert_similarity_info_output(expected_info, info_string)
121
- infos = parse_similarity_infos_string(info_string)
122
- expected_info.each do |key, value|
123
- assert_equal value.to_s, infos[key]
124
- end
125
- end
126
-
127
- def parse_threshold_infos_string(infos_string)
128
- infos = []
129
- infos_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
130
- info_data = line.split
131
- if info_data.size == 4
132
- requested_pvalue, real_pvalue, number_of_recognized_words, threshold = info_data
133
- info = {requested_pvalue: requested_pvalue,
134
- real_pvalue: real_pvalue,
135
- number_of_recognized_words: number_of_recognized_words,
136
- threshold: threshold }
137
- elsif info_data.size == 3
138
- requested_pvalue, real_pvalue, threshold = info_data
139
- info = {requested_pvalue: requested_pvalue,
140
- real_pvalue: real_pvalue,
141
- threshold: threshold }
142
- else
143
- raise 'can\'t parse threshold infos table'
144
- end
145
- infos << info
146
- end
147
- infos
148
- end
149
-
150
- def assert_threshold_info_output(*expected_infos, info_string)
151
- infos = parse_threshold_infos_string(info_string)
152
- expected_infos.zip(infos).each do |expected_info, info|
153
- assert_not_nil info
154
- expected_info.each do |key, value|
155
- assert_equal value.to_s, info[key]
156
- end
157
- end
158
- end
159
-
160
- end
1
+ $bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
2
+ $LOAD_PATH.unshift $bioinform_folder
3
+
4
+ require 'test/unit'
5
+ require 'stringio'
6
+ require 'shellwords'
7
+
8
+ require_relative '../lib/macroape/cli/find_threshold'
9
+ require_relative '../lib/macroape/cli/find_pvalue'
10
+ require_relative '../lib/macroape/cli/eval_similarity'
11
+ require_relative '../lib/macroape/cli/eval_alignment'
12
+ require_relative '../lib/macroape/cli/preprocess_collection'
13
+ require_relative '../lib/macroape/cli/scan_collection'
14
+ require_relative '../lib/macroape/cli/align_motifs'
15
+
16
+ module Helpers
17
+ # from minitest
18
+ def self.capture_io(&block)
19
+ orig_stdout, orig_stderr = $stdout, $stderr
20
+ captured_stdout, captured_stderr = StringIO.new, StringIO.new
21
+ $stdout, $stderr = captured_stdout, captured_stderr
22
+ yield
23
+ return {stdout: captured_stdout.string, stderr: captured_stderr.string}
24
+ ensure
25
+ $stdout = orig_stdout
26
+ $stderr = orig_stderr
27
+ end
28
+
29
+ def self.suppress_output(&block)
30
+ orig_stdout, orig_stderr = $stdout, $stderr
31
+ captured_stdout, captured_stderr = StringIO.new, StringIO.new
32
+ $stdout, $stderr = captured_stdout, captured_stderr
33
+ yield
34
+ ensure
35
+ $stdout = orig_stdout
36
+ $stderr = orig_stderr
37
+ end
38
+
39
+ # Method stubs $stdin not STDIN !
40
+ def self.provide_stdin(input, &block)
41
+ orig_stdin = $stdin
42
+ $stdin = StringIO.new(input)
43
+ yield
44
+ ensure
45
+ $stdin = orig_stdin
46
+ end
47
+
48
+ def self.capture_output(&block)
49
+ capture_io(&block)[:stdout]
50
+ end
51
+ def self.capture_stderr(&block)
52
+ capture_io(&block)[:stderr]
53
+ end
54
+
55
+ # aaa\tbbb\nccc\tddd ==> [['aaa','bbb'],['ccc','ddd']]
56
+ def self.split_on_lines(str)
57
+ str.lines.map{|line| line.strip.split("\t")}
58
+ end
59
+
60
+ def self.obtain_pvalue_by_threshold(args)
61
+ find_pvalue_output(args).last.last
62
+ end
63
+ def self.exec_cmd(executable, param_list)
64
+ "ruby -I #{$lib_folder} #{$lib_folder}/../bin/#{executable} #{param_list}"
65
+ end
66
+ def self.find_threshold_output(param_list)
67
+ capture_output{ Macroape::CLI::FindThreshold.main(param_list.shellsplit) }
68
+ end
69
+ def self.align_motifs_output(param_list)
70
+ split_on_lines( capture_output{ Macroape::CLI::AlignMotifs.main(param_list.shellsplit)} )
71
+ end
72
+ def self.find_pvalue_output(param_list)
73
+ capture_output{ Macroape::CLI::FindPValue.main(param_list.shellsplit)} .lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).map{|line|line.split("\t")}
74
+ end
75
+ def self.eval_similarity_output(param_list)
76
+ capture_output{ Macroape::CLI::EvalSimilarity.main(param_list.shellsplit)}
77
+ end
78
+ def self.eval_alignment_output(param_list)
79
+ capture_output{ Macroape::CLI::EvalAlignment.main(param_list.shellsplit)}
80
+ end
81
+ def self.scan_collection_output(param_list)
82
+ capture_output{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }.lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).join("\n")
83
+ end
84
+ def self.scan_collection_stderr(param_list)
85
+ capture_stderr{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }
86
+ end
87
+ def self.run_preprocess_collection(param_list)
88
+ suppress_output{ Macroape::CLI::PreprocessCollection.main(param_list.shellsplit) }
89
+ end
90
+
91
+ def parse_similarity_infos_string(info_string)
92
+ infos = {}
93
+ info_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
94
+ key, value = line.split
95
+ case key
96
+ when 'S' then infos[:similarity] = value
97
+ when 'D' then infos[:distance] = value
98
+ when 'L' then infos[:length] = value
99
+ when 'SH' then infos[:shift] = value
100
+ when 'OR' then infos[:orientation] = value
101
+ when 'W' then infos[:words_recognized_by_both] = value
102
+
103
+ when 'W1' then infos[:words_recognized_by_first] = value
104
+ when 'P1' then infos[:pvalue_recognized_by_first] = value
105
+ when 'T1' then infos[:threshold_first] = value
106
+
107
+ when 'W2' then infos[:words_recognized_by_second] = value
108
+ when 'P2' then infos[:pvalue_recognized_by_second] = value
109
+ when 'T2' then infos[:threshold_second] = value
110
+
111
+ when 'A1' then infos[:matrix_first_alignment] = value
112
+ when 'A2' then infos[:matrix_second_alignment] = value
113
+
114
+ when 'V' then infos[:discretization] = value
115
+ end
116
+ end
117
+ infos
118
+ end
119
+
120
+ def assert_similarity_info_output(expected_info, info_string)
121
+ infos = parse_similarity_infos_string(info_string)
122
+ expected_info.each do |key, value|
123
+ assert_equal value.to_s, infos[key]
124
+ end
125
+ end
126
+
127
+ def parse_threshold_infos_string(infos_string)
128
+ infos = []
129
+ infos_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
130
+ info_data = line.split
131
+ if info_data.size == 4
132
+ requested_pvalue, real_pvalue, number_of_recognized_words, threshold = info_data
133
+ info = {requested_pvalue: requested_pvalue,
134
+ real_pvalue: real_pvalue,
135
+ number_of_recognized_words: number_of_recognized_words,
136
+ threshold: threshold }
137
+ elsif info_data.size == 3
138
+ requested_pvalue, real_pvalue, threshold = info_data
139
+ info = {requested_pvalue: requested_pvalue,
140
+ real_pvalue: real_pvalue,
141
+ threshold: threshold }
142
+ else
143
+ raise 'can\'t parse threshold infos table'
144
+ end
145
+ infos << info
146
+ end
147
+ infos
148
+ end
149
+
150
+ def assert_threshold_info_output(*expected_infos, info_string)
151
+ infos = parse_threshold_infos_string(info_string)
152
+ expected_infos.zip(infos).each do |expected_info, info|
153
+ assert_not_nil info
154
+ expected_info.each do |key, value|
155
+ assert_equal value.to_s, info[key]
156
+ end
157
+ end
158
+ end
159
+ end