RubyGems - macroape - Versions diffs - 4.0.2 → 4.1.0 - Mend

macroape 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

checksums.yaml +4 -4
data/.gitignore +17 -17
data/Gemfile +4 -4
data/LICENSE +22 -22
data/README.md +70 -70
data/Rakefile.rb +49 -49
data/TODO.txt +46 -46
data/benchmark/benchmark_helper.rb +4 -4
data/benchmark/similarity_benchmark.rb +52 -52
data/bin/align_motifs +4 -4
data/bin/eval_alignment +4 -4
data/bin/eval_similarity +4 -4
data/bin/find_pvalue +4 -4
data/bin/find_threshold +4 -4
data/bin/preprocess_collection +4 -4
data/bin/scan_collection +4 -4
data/lib/macroape.rb +14 -11
data/lib/macroape/aligned_pair_intersection.rb +61 -62
data/lib/macroape/cli.rb +191 -188
data/lib/macroape/cli/align_motifs.rb +120 -100
data/lib/macroape/cli/eval_alignment.rb +157 -156
data/lib/macroape/cli/eval_similarity.rb +138 -137
data/lib/macroape/cli/find_pvalue.rb +93 -87
data/lib/macroape/cli/find_threshold.rb +103 -96
data/lib/macroape/cli/preprocess_collection.rb +169 -161
data/lib/macroape/cli/scan_collection.rb +171 -163
data/lib/macroape/collection.rb +29 -0
data/lib/macroape/motif_with_thresholds.rb +18 -0
data/lib/macroape/pwm_compare.rb +39 -44
data/lib/macroape/pwm_compare_aligned.rb +139 -130
data/lib/macroape/{counting.rb → pwm_counting.rb} +175 -121
data/lib/macroape/support/inverf.rb +13 -0
data/lib/macroape/support/partial_sums.rb +17 -0
data/lib/macroape/version.rb +4 -4
data/macroape.gemspec +19 -19
data/spec/count_distribution_spec.rb +112 -109
data/spec/inverf_spec.rb +23 -0
data/spec/partial_sums_spec.rb +28 -0
data/spec/spec_helper.rb +11 -11
data/test/align_motifs_test.rb +42 -43
data/test/data/AHR_si.pwm +10 -10
data/test/data/KLF3_f1.pcm +16 -16
data/test/data/KLF3_f1.pwm +16 -16
data/test/data/KLF4_f2.pcm +11 -11
data/test/data/KLF4_f2.pwm +11 -11
data/test/data/KLF4_f2_scan_results_all.txt +2 -2
data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -1
data/test/data/KLF4_f2_scan_results_precise_mode.txt +2 -2
data/test/data/SP1_f1.pcm +12 -12
data/test/data/SP1_f1.pwm +12 -12
data/test/data/SP1_f1_revcomp.pcm +12 -12
data/test/data/SP1_f1_revcomp.pwm +12 -12
data/test/data/medium_motif.pwm +8 -8
data/test/data/short_motif.pwm +7 -7
data/test/data/test_collection.yaml +231 -214
data/test/data/test_collection/GABPA_f1.pwm +14 -14
data/test/data/test_collection/KLF4_f2.pwm +10 -10
data/test/data/test_collection/SP1_f1.pwm +12 -12
data/test/data/test_collection_pcm/GABPA_f1.pcm +14 -14
data/test/data/test_collection_pcm/KLF4_f2.pcm +11 -11
data/test/data/test_collection_pcm/SP1_f1.pcm +12 -12
data/test/data/test_collection_single_file.txt +38 -38
data/test/data/test_collection_single_file_pcm.txt +37 -37
data/test/data/test_collection_weak.yaml +231 -214
data/test/eval_alignment_test.rb +90 -111
data/test/eval_similarity_test.rb +105 -123
data/test/find_pvalue_test.rb +34 -39
data/test/find_threshold_test.rb +87 -91
data/test/preprocess_collection_test.rb +56 -65
data/test/scan_collection_test.rb +42 -48
data/test/test_helper.rb +159 -160
metadata +14 -10
data/test/data/collection_pcm_without_thresholds.yaml +0 -188
data/test/data/collection_without_thresholds.yaml +0 -188

data/test/find_threshold_test.rb CHANGED

@@ -1,91 +1,87 @@
-require_relative 'test_helper'
-class FindThresholdTest < Test::Unit::TestCase
-  include Helpers
-  def setup
-    @start_dir = Dir.pwd
-    Dir.chdir File.join(File.dirname(__FILE__), 'data')
-  end
-  def teardown
-    Dir.chdir(@start_dir)
-  end
-  def test_process_one_pvalue_weak_thresold
-    assert_threshold_info_output({requested_pvalue: 0.001,
-                                  real_pvalue: 0.0010004043579101562,
-                                  number_of_recognized_words: 1049.0,
-                                  threshold: 4.1718},
-                                  Helpers.find_threshold_output("KLF4_f2.pwm 0.001 --boundary upper") )
-    # additional consistency checks
-    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.1718"), '0.0010004043579101562'
-  end
-  def test_process_one_pvalue_strong_thresold
-    assert_threshold_info_output({requested_pvalue: 0.001,
-                                  real_pvalue: 0.00099945068359375,
-                                  number_of_recognized_words: 1048.0,
-                                  threshold: 4.17189},
-                                  Helpers.find_threshold_output("KLF4_f2.pwm 0.001") )
-    # additional consistency checks
-    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
-  end
-  def test_process_several_pvalues
-    pvalues = []
-    assert_threshold_info_output({requested_pvalue: 0.0005,
-                                  real_pvalue: 0.000499725341796875,
-                                  number_of_recognized_words: 524.0,
-                                  threshold: 5.24071},
-                                  {requested_pvalue: 0.001,
-                                  real_pvalue: 0.00099945068359375,
-                                  number_of_recognized_words: 1048.0,
-                                  threshold: 4.17189},
-                                  Helpers.find_threshold_output('KLF4_f2.pwm 0.001 0.0005') )
-    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
-    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.24071"), '0.000499725341796875'
-  end
-  def test_process_pcm
-    assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm"),
-                  Helpers.find_threshold_output("KLF4_f2.pcm --pcm"))
-  end
-  def test_process_default_pvalue
-    assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm 0.0005"),
-                  Helpers.find_threshold_output("KLF4_f2.pwm"))
-  end
-  def test_custom_discretization
-    assert_threshold_info_output({requested_pvalue: 0.0005,
-                                  real_pvalue: 0.0004978179931640625,
-                                  number_of_recognized_words: 522.0,
-                                  threshold: 5.281000000000001},
-                                  Helpers.find_threshold_output("KLF4_f2.pwm -d 100") )
-    # additional consistency checks
-    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.281000000000001 -d 100"), '0.0004978179931640625'
-  end
-  def test_custom_background
-    assert_threshold_info_output({requested_pvalue: 0.0005,
-                                  real_pvalue: '0.00049964290000001',
-                                  threshold: '-0.10449000000000001'},
-                                  Helpers.find_threshold_output("KLF4_f2.pwm -b 0.4,0.1,0.1,0.4") )
-    # additional consistency checks
-    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm -0.10449000000000001 -b 0.4,0.1,0.1,0.4"), '0.0004996429000000166' # here real pvalue differs at last digits =\
-  end
-  def test_process_pwm_from_stdin
-    assert_equal Helpers.find_threshold_output('KLF4_f2.pwm'),
-                Helpers.provide_stdin(File.read('KLF4_f2.pwm')){ Helpers.find_threshold_output('.stdin') }
-  end
-  # TODO: it should be rewritten as a spec for count_distribution_under_pvalue - not to raise an error(log out of domain) and return a value
-  def test_process_large_pvalue
-    assert_nothing_raised do
-      # discretization is set not to take very long time calculation
-      assert_threshold_info_output({requested_pvalue: 0.8,
-                                  real_pvalue: 0.7996518611907959,
-                                  number_of_recognized_words: 3353983.0,
-                                  threshold: -17.89},
-                                  Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') )
-    end
-    assert_equal Helpers.obtain_pvalue_by_threshold("SP1_f1.pwm -17.89 -d 10"), '0.7996518611907959'
-  end
-end
+require_relative 'test_helper'
+class FindThresholdTest < Test::Unit::TestCase
+  include Helpers
+  def setup
+    @start_dir = Dir.pwd
+    Dir.chdir File.join(File.dirname(__FILE__), 'data')
+  end
+  def teardown
+    Dir.chdir(@start_dir)
+  end
+  def test_process_one_pvalue_weak_thresold
+    assert_threshold_info_output({requested_pvalue: 0.001,
+                                  real_pvalue: 0.0010004043579101562,
+                                  number_of_recognized_words: 1049.0,
+                                  threshold: 4.1718},
+                                  Helpers.find_threshold_output("KLF4_f2.pwm 0.001 --boundary upper") )
+    # additional consistency checks
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.1718"), '0.0010004043579101562'
+  end
+  def test_process_one_pvalue_strong_thresold
+    assert_threshold_info_output({requested_pvalue: 0.001,
+                                  real_pvalue: 0.00099945068359375,
+                                  number_of_recognized_words: 1048.0,
+                                  threshold: 4.17189},
+                                  Helpers.find_threshold_output("KLF4_f2.pwm 0.001") )
+    # additional consistency checks
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
+  end
+  def test_process_several_pvalues
+    pvalues = []
+    assert_threshold_info_output({requested_pvalue: 0.0005,
+                                  real_pvalue: 0.000499725341796875,
+                                  number_of_recognized_words: 524.0,
+                                  threshold: 5.24071},
+                                  {requested_pvalue: 0.001,
+                                  real_pvalue: 0.00099945068359375,
+                                  number_of_recognized_words: 1048.0,
+                                  threshold: 4.17189},
+                                  Helpers.find_threshold_output('KLF4_f2.pwm 0.001 0.0005') )
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.24071"), '0.000499725341796875'
+  end
+  def test_process_pcm
+    assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm"),
+                  Helpers.find_threshold_output("KLF4_f2.pcm --pcm"))
+  end
+  def test_process_default_pvalue
+    assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm 0.0005"),
+                  Helpers.find_threshold_output("KLF4_f2.pwm"))
+  end
+  def test_custom_discretization
+    assert_threshold_info_output({requested_pvalue: 0.0005,
+                                  real_pvalue: 0.0004978179931640625,
+                                  number_of_recognized_words: 522.0,
+                                  threshold: 5.281000000000001},
+                                  Helpers.find_threshold_output("KLF4_f2.pwm -d 100") )
+    # additional consistency checks
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.281000000000001 -d 100"), '0.0004978179931640625'
+  end
+  def test_custom_background
+    assert_threshold_info_output({requested_pvalue: 0.0005,
+                                  real_pvalue: '0.00049964290000001',
+                                  threshold: '-0.10449000000000001'},
+                                  Helpers.find_threshold_output("KLF4_f2.pwm -b 0.4,0.1,0.1,0.4") )
+    # additional consistency checks
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm -0.10449000000000001 -b 0.4,0.1,0.1,0.4"), '0.0004996429000000166' # here real pvalue differs at last digits =\
+  end
+  # TODO: it should be rewritten as a spec for count_distribution_under_pvalue - not to raise an error(log out of domain) and return a value
+  def test_process_large_pvalue
+    assert_nothing_raised do
+      # discretization is set not to take very long time calculation
+      assert_threshold_info_output({requested_pvalue: 0.8,
+                                  real_pvalue: 0.7996518611907959,
+                                  number_of_recognized_words: 3353983.0,
+                                  threshold: -17.89},
+                                  Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') )
+    end
+    assert_equal Helpers.obtain_pvalue_by_threshold("SP1_f1.pwm -17.89 -d 10"), '0.7996518611907959'
+  end
+end

data/test/preprocess_collection_test.rb CHANGED

@@ -1,65 +1,56 @@
-require_relative 'test_helper'
-require 'yaml'
-# Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.9.3 p194
-# it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
-class TestPreprocessCollection < Test::Unit::TestCase
-  def setup
-    @start_dir = Dir.pwd
-    Dir.chdir File.join(File.dirname(__FILE__), 'data')
-  end
-  def teardown
-    File.delete('test_collection.yaml.tmp')  if File.exist? 'test_collection.yaml.tmp'
-    File.delete('my_collection.yaml')  if File.exist? 'my_collection.yaml'
-    Dir.chdir(@start_dir)
-  end
-  def test_weak_thresholds
-    Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent')
-    assert_equal YAML.load(File.read('test_collection_weak.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-  def test_multipvalue_preprocessing
-    Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
-    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-  def test_preprocessing_collection_from_a_single_file
-    Helpers.run_preprocess_collection('test_collection_single_file.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
-    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-  def test_preprocessing_collection_from_stdin
-    Helpers.provide_stdin('test_collection/GABPA_f1.pwm  test_collection/KLF4_f2.pwm  test_collection/SP1_f1.pwm'){
-      Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
-    }
-    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-  def test_preprocessing_folder_pcm
-    Helpers.run_preprocess_collection('test_collection_pcm test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
-    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-  def test_preprocessing_collection_from_a_single_file_pcm
-    Helpers.run_preprocess_collection('test_collection_single_file_pcm.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
-    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-  def test_preprocessing_collection_from_a_collection
-    Helpers.run_preprocess_collection('collection_without_thresholds.yaml test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
-    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-  def test_preprocessing_collection_from_a_pcm_collection
-    Helpers.run_preprocess_collection('collection_pcm_without_thresholds.yaml test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
-    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-  def test_preprocessing_collection_from_stdin_pcm
-    Helpers.provide_stdin('test_collection_pcm/GABPA_f1.pcm  test_collection_pcm/KLF4_f2.pcm  test_collection_pcm/SP1_f1.pcm'){
-      Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
-    }
-    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
-end
+require_relative 'test_helper'
+require 'yaml'
+# Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.9.3 p194
+# it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
+class TestPreprocessCollection < Test::Unit::TestCase
+  def setup
+    @start_dir = Dir.pwd
+    Dir.chdir File.join(File.dirname(__FILE__), 'data')
+  end
+  def teardown
+    File.delete('test_collection.yaml.tmp')  if File.exist? 'test_collection.yaml.tmp'
+    File.delete('my_collection.yaml')  if File.exist? 'my_collection.yaml'
+    Dir.chdir(@start_dir)
+  end
+  def test_weak_thresholds
+    Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent')
+    assert_equal YAML.load(File.read('test_collection_weak.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
+  end
+  def test_multipvalue_preprocessing
+    Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
+    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
+  end
+  def test_preprocessing_collection_from_a_single_file
+    Helpers.run_preprocess_collection('test_collection_single_file.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
+    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
+  end
+  def test_preprocessing_collection_from_stdin
+    Helpers.provide_stdin('test_collection/GABPA_f1.pwm  test_collection/KLF4_f2.pwm  test_collection/SP1_f1.pwm'){
+      Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
+    }
+    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
+  end
+  def test_preprocessing_folder_pcm
+    Helpers.run_preprocess_collection('test_collection_pcm test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
+    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
+  end
+  def test_preprocessing_collection_from_a_single_file_pcm
+    Helpers.run_preprocess_collection('test_collection_single_file_pcm.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
+    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
+  end
+  def test_preprocessing_collection_from_stdin_pcm
+    Helpers.provide_stdin('test_collection_pcm/GABPA_f1.pcm  test_collection_pcm/KLF4_f2.pcm  test_collection_pcm/SP1_f1.pcm'){
+      Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
+    }
+    assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
+  end
+end

data/test/scan_collection_test.rb CHANGED

@@ -1,48 +1,42 @@
-require_relative 'test_helper'
-class TestScanCollection < Test::Unit::TestCase
-  def setup
-    @start_dir = Dir.pwd
-    Dir.chdir File.join(File.dirname(__FILE__), 'data')
-  end
-  def teardown
-    Dir.chdir(@start_dir)
-  end
-  def test_scan_pcm
-    assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
-                 Helpers.scan_collection_output('KLF4_f2.pcm test_collection.yaml --silent --pcm --boundary lower').gsub("\r\n","\n")
-  end
-  def test_scan_default_cutoff
-    assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
-                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower').gsub("\r\n","\n")
-  end
-  def test_scan_weak_threshold
-    assert_equal File.read('KLF4_f2_scan_results_weak_threshold.txt').gsub("\r\n", "\n"),
-                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection_weak.yaml --silent').gsub("\r\n","\n")
-  end
-  def test_scan_and_output_all_results
-    assert_equal File.read('KLF4_f2_scan_results_all.txt').gsub("\r\n", "\n"),
-                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --all --silent --boundary lower').gsub("\r\n","\n")
-  end
-  def test_scan_precise_mode
-    assert_equal File.read('KLF4_f2_scan_results_precise_mode.txt').gsub("\r\n","\n"),
-                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
-  end
-  def test_process_query_pwm_from_stdin
-    assert_equal Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower'),
-                Helpers.provide_stdin(File.read('KLF4_f2.pwm')) {
-                  Helpers.scan_collection_output('.stdin test_collection.yaml --silent --boundary lower')
-                }
-  end
-  def test_scan_medium_length_motif
-    assert_match /Query motif medium_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the rough discretization level of 1. Forcing precise discretization level of 10/,
-                 Helpers.scan_collection_stderr('medium_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
-  end
-  def test_scan_short_length_motif
-    assert_match /Query motif short_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the precise discretization level of 10\. It.s impossible to scan collection for this motif/,
-                 Helpers.scan_collection_stderr('short_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
-  end
-end
+require_relative 'test_helper'
+class TestScanCollection < Test::Unit::TestCase
+  def setup
+    @start_dir = Dir.pwd
+    Dir.chdir File.join(File.dirname(__FILE__), 'data')
+  end
+  def teardown
+    Dir.chdir(@start_dir)
+  end
+  def test_scan_pcm
+    assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pcm test_collection.yaml --silent --pcm --boundary lower').gsub("\r\n","\n")
+  end
+  def test_scan_default_cutoff
+    assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower').gsub("\r\n","\n")
+  end
+  def test_scan_weak_threshold
+    assert_equal File.read('KLF4_f2_scan_results_weak_threshold.txt').gsub("\r\n", "\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection_weak.yaml --silent').gsub("\r\n","\n")
+  end
+  def test_scan_and_output_all_results
+    assert_equal File.read('KLF4_f2_scan_results_all.txt').gsub("\r\n", "\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --all --silent --boundary lower').gsub("\r\n","\n")
+  end
+  def test_scan_precise_mode
+    assert_equal File.read('KLF4_f2_scan_results_precise_mode.txt').gsub("\r\n","\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
+  end
+  def test_scan_medium_length_motif
+    assert_match /Query motif medium_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the rough discretization level of 1. Forcing precise discretization level of 10/,
+                 Helpers.scan_collection_stderr('medium_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
+  end
+  def test_scan_short_length_motif
+    assert_match /Query motif short_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the precise discretization level of 10\. It.s impossible to scan collection for this motif/,
+                 Helpers.scan_collection_stderr('short_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
+  end
+end

data/test/test_helper.rb CHANGED

@@ -1,160 +1,159 @@
-$bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
-$LOAD_PATH.unshift $bioinform_folder
-require 'test/unit'
-require 'stringio'
-require 'shellwords'
-require_relative '../lib/macroape/cli/find_threshold'
-require_relative '../lib/macroape/cli/find_pvalue'
-require_relative '../lib/macroape/cli/eval_similarity'
-require_relative '../lib/macroape/cli/eval_alignment'
-require_relative '../lib/macroape/cli/preprocess_collection'
-require_relative '../lib/macroape/cli/scan_collection'
-require_relative '../lib/macroape/cli/align_motifs'
-module Helpers
-  # from minitest
-  def self.capture_io(&block)
-    orig_stdout, orig_stderr = $stdout, $stderr
-    captured_stdout, captured_stderr = StringIO.new, StringIO.new
-    $stdout, $stderr = captured_stdout, captured_stderr
-    yield
-    return {stdout: captured_stdout.string, stderr: captured_stderr.string}
-  ensure
-    $stdout = orig_stdout
-    $stderr = orig_stderr
-  end
-  def self.suppress_output(&block)
-    orig_stdout, orig_stderr = $stdout, $stderr
-    captured_stdout, captured_stderr = StringIO.new, StringIO.new
-    $stdout, $stderr = captured_stdout, captured_stderr
-    yield
-  ensure
-    $stdout = orig_stdout
-    $stderr = orig_stderr
-  end
-  # Method stubs $stdin not STDIN !
-  def self.provide_stdin(input, &block)
-    orig_stdin = $stdin
-    $stdin = StringIO.new(input)
-    yield
-  ensure
-    $stdin = orig_stdin
-  end
-  def self.capture_output(&block)
-    capture_io(&block)[:stdout]
-  end
-  def self.capture_stderr(&block)
-    capture_io(&block)[:stderr]
-  end
-  # aaa\tbbb\nccc\tddd  ==>  [['aaa','bbb'],['ccc','ddd']]
-  def self.split_on_lines(str)
-    str.lines.map{|line| line.strip.split("\t")}
-  end
-  def self.obtain_pvalue_by_threshold(args)
-    find_pvalue_output(args).last.last
-  end
-  def self.exec_cmd(executable, param_list)
-    "ruby -I #{$lib_folder} #{$lib_folder}/../bin/#{executable} #{param_list}"
-  end
-  def self.find_threshold_output(param_list)
-    capture_output{ Macroape::CLI::FindThreshold.main(param_list.shellsplit) }
-  end
-  def self.align_motifs_output(param_list)
-    split_on_lines( capture_output{ Macroape::CLI::AlignMotifs.main(param_list.shellsplit)} )
-  end
-  def self.find_pvalue_output(param_list)
-    capture_output{ Macroape::CLI::FindPValue.main(param_list.shellsplit)} .lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).map{|line|line.split("\t")}
-  end
-  def self.eval_similarity_output(param_list)
-    capture_output{ Macroape::CLI::EvalSimilarity.main(param_list.shellsplit)}
-  end
-  def self.eval_alignment_output(param_list)
-    capture_output{ Macroape::CLI::EvalAlignment.main(param_list.shellsplit)}
-  end
-  def self.scan_collection_output(param_list)
-    capture_output{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }.lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).join("\n")
-  end
-  def self.scan_collection_stderr(param_list)
-    capture_stderr{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }
-  end
-  def self.run_preprocess_collection(param_list)
-    suppress_output{ Macroape::CLI::PreprocessCollection.main(param_list.shellsplit) }
-  end
-  def parse_similarity_infos_string(info_string)
-    infos = {}
-    info_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
-      key, value = line.split
-      case key
-        when 'S'  then infos[:similarity] = value
-        when 'D'  then infos[:distance] = value
-        when 'L'  then infos[:length] = value
-        when 'SH'  then infos[:shift] = value
-        when 'OR'  then infos[:orientation] = value
-        when 'W'  then infos[:words_recognized_by_both] = value
-        when 'W1' then infos[:words_recognized_by_first] = value
-        when 'P1' then infos[:pvalue_recognized_by_first] = value
-        when 'T1' then infos[:threshold_first] = value
-        when 'W2' then infos[:words_recognized_by_second] = value
-        when 'P2' then infos[:pvalue_recognized_by_second] = value
-        when 'T2' then infos[:threshold_second] = value
-        when 'A1'  then infos[:matrix_first_alignment] = value
-        when 'A2'  then infos[:matrix_second_alignment] = value
-        when 'V' then infos[:discretization] = value
-      end
-    end
-    infos
-  end
-  def assert_similarity_info_output(expected_info, info_string)
-    infos = parse_similarity_infos_string(info_string)
-    expected_info.each do |key, value|
-      assert_equal value.to_s, infos[key]
-    end
-  end
-  def parse_threshold_infos_string(infos_string)
-    infos = []
-    infos_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
-      info_data = line.split
-      if info_data.size == 4
-        requested_pvalue, real_pvalue, number_of_recognized_words, threshold = info_data
-        info = {requested_pvalue: requested_pvalue,
-                real_pvalue: real_pvalue,
-                number_of_recognized_words: number_of_recognized_words,
-                threshold: threshold }
-      elsif info_data.size == 3
-        requested_pvalue, real_pvalue, threshold = info_data
-        info = {requested_pvalue: requested_pvalue,
-                real_pvalue: real_pvalue,
-                threshold: threshold }
-      else
-        raise 'can\'t parse threshold infos table'
-      end
-      infos << info
-    end
-    infos
-  end
-  def assert_threshold_info_output(*expected_infos, info_string)
-    infos = parse_threshold_infos_string(info_string)
-    expected_infos.zip(infos).each do |expected_info, info|
-      assert_not_nil info
-      expected_info.each do |key, value|
-        assert_equal value.to_s, info[key]
-      end
-    end
-  end
-end
+$bioinform_folder = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'bioinform', 'lib'))
+$LOAD_PATH.unshift $bioinform_folder
+require 'test/unit'
+require 'stringio'
+require 'shellwords'
+require_relative '../lib/macroape/cli/find_threshold'
+require_relative '../lib/macroape/cli/find_pvalue'
+require_relative '../lib/macroape/cli/eval_similarity'
+require_relative '../lib/macroape/cli/eval_alignment'
+require_relative '../lib/macroape/cli/preprocess_collection'
+require_relative '../lib/macroape/cli/scan_collection'
+require_relative '../lib/macroape/cli/align_motifs'
+module Helpers
+  # from minitest
+  def self.capture_io(&block)
+    orig_stdout, orig_stderr = $stdout, $stderr
+    captured_stdout, captured_stderr = StringIO.new, StringIO.new
+    $stdout, $stderr = captured_stdout, captured_stderr
+    yield
+    return {stdout: captured_stdout.string, stderr: captured_stderr.string}
+  ensure
+    $stdout = orig_stdout
+    $stderr = orig_stderr
+  end
+  def self.suppress_output(&block)
+    orig_stdout, orig_stderr = $stdout, $stderr
+    captured_stdout, captured_stderr = StringIO.new, StringIO.new
+    $stdout, $stderr = captured_stdout, captured_stderr
+    yield
+  ensure
+    $stdout = orig_stdout
+    $stderr = orig_stderr
+  end
+  # Method stubs $stdin not STDIN !
+  def self.provide_stdin(input, &block)
+    orig_stdin = $stdin
+    $stdin = StringIO.new(input)
+    yield
+  ensure
+    $stdin = orig_stdin
+  end
+  def self.capture_output(&block)
+    capture_io(&block)[:stdout]
+  end
+  def self.capture_stderr(&block)
+    capture_io(&block)[:stderr]
+  end
+  # aaa\tbbb\nccc\tddd  ==>  [['aaa','bbb'],['ccc','ddd']]
+  def self.split_on_lines(str)
+    str.lines.map{|line| line.strip.split("\t")}
+  end
+  def self.obtain_pvalue_by_threshold(args)
+    find_pvalue_output(args).last.last
+  end
+  def self.exec_cmd(executable, param_list)
+    "ruby -I #{$lib_folder} #{$lib_folder}/../bin/#{executable} #{param_list}"
+  end
+  def self.find_threshold_output(param_list)
+    capture_output{ Macroape::CLI::FindThreshold.main(param_list.shellsplit) }
+  end
+  def self.align_motifs_output(param_list)
+    split_on_lines( capture_output{ Macroape::CLI::AlignMotifs.main(param_list.shellsplit)} )
+  end
+  def self.find_pvalue_output(param_list)
+    capture_output{ Macroape::CLI::FindPValue.main(param_list.shellsplit)} .lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).map{|line|line.split("\t")}
+  end
+  def self.eval_similarity_output(param_list)
+    capture_output{ Macroape::CLI::EvalSimilarity.main(param_list.shellsplit)}
+  end
+  def self.eval_alignment_output(param_list)
+    capture_output{ Macroape::CLI::EvalAlignment.main(param_list.shellsplit)}
+  end
+  def self.scan_collection_output(param_list)
+    capture_output{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }.lines.to_a.map(&:strip).reject{|line| line.start_with? '#' }.reject(&:empty?).join("\n")
+  end
+  def self.scan_collection_stderr(param_list)
+    capture_stderr{ Macroape::CLI::ScanCollection.main(param_list.shellsplit) }
+  end
+  def self.run_preprocess_collection(param_list)
+    suppress_output{ Macroape::CLI::PreprocessCollection.main(param_list.shellsplit) }
+  end
+  def parse_similarity_infos_string(info_string)
+    infos = {}
+    info_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
+      key, value = line.split
+      case key
+        when 'S'  then infos[:similarity] = value
+        when 'D'  then infos[:distance] = value
+        when 'L'  then infos[:length] = value
+        when 'SH'  then infos[:shift] = value
+        when 'OR'  then infos[:orientation] = value
+        when 'W'  then infos[:words_recognized_by_both] = value
+        when 'W1' then infos[:words_recognized_by_first] = value
+        when 'P1' then infos[:pvalue_recognized_by_first] = value
+        when 'T1' then infos[:threshold_first] = value
+        when 'W2' then infos[:words_recognized_by_second] = value
+        when 'P2' then infos[:pvalue_recognized_by_second] = value
+        when 'T2' then infos[:threshold_second] = value
+        when 'A1'  then infos[:matrix_first_alignment] = value
+        when 'A2'  then infos[:matrix_second_alignment] = value
+        when 'V' then infos[:discretization] = value
+      end
+    end
+    infos
+  end
+  def assert_similarity_info_output(expected_info, info_string)
+    infos = parse_similarity_infos_string(info_string)
+    expected_info.each do |key, value|
+      assert_equal value.to_s, infos[key]
+    end
+  end
+  def parse_threshold_infos_string(infos_string)
+    infos = []
+    infos_string.lines.map(&:strip).reject{|line| line.start_with?('#')}.reject(&:empty?).each do |line|
+      info_data = line.split
+      if info_data.size == 4
+        requested_pvalue, real_pvalue, number_of_recognized_words, threshold = info_data
+        info = {requested_pvalue: requested_pvalue,
+                real_pvalue: real_pvalue,
+                number_of_recognized_words: number_of_recognized_words,
+                threshold: threshold }
+      elsif info_data.size == 3
+        requested_pvalue, real_pvalue, threshold = info_data
+        info = {requested_pvalue: requested_pvalue,
+                real_pvalue: real_pvalue,
+                threshold: threshold }
+      else
+        raise 'can\'t parse threshold infos table'
+      end
+      infos << info
+    end
+    infos
+  end
+  def assert_threshold_info_output(*expected_infos, info_string)
+    infos = parse_threshold_infos_string(info_string)
+    expected_infos.zip(infos).each do |expected_info, info|
+      assert_not_nil info
+      expected_info.each do |key, value|
+        assert_equal value.to_s, info[key]
+      end
+    end
+  end
+end