RubyGems - macroape - Versions diffs - 3.3.7 → 3.3.8 - Mend

macroape 3.3.7 → 3.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

data/README.md +2 -2
data/Rakefile.rb +6 -6
data/TODO.txt +23 -3
data/benchmark/similarity_benchmark.rb +18 -18
data/lib/macroape/aligned_pair_intersection.rb +4 -4
data/lib/macroape/cli/align_motifs.rb +34 -28
data/lib/macroape/cli/eval_alignment.rb +73 -47
data/lib/macroape/cli/eval_similarity.rb +65 -40
data/lib/macroape/cli/find_pvalue.rb +30 -34
data/lib/macroape/cli/find_threshold.rb +52 -41
data/lib/macroape/cli/preprocess_collection.rb +68 -58
data/lib/macroape/cli/scan_collection.rb +89 -73
data/lib/macroape/cli.rb +184 -1
data/lib/macroape/counting.rb +31 -5
data/lib/macroape/pwm_compare.rb +8 -2
data/lib/macroape/pwm_compare_aligned.rb +15 -10
data/lib/macroape/version.rb +2 -1
data/macroape.gemspec +2 -1
data/spec/count_distribution_spec.rb +11 -11
data/test/align_motifs_test.rb +16 -4
data/test/data/{AHR_si.pat → AHR_si.pwm} +0 -0
data/test/data/{KLF3_f1.pat → KLF3_f1.pwm} +0 -0
data/test/data/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
data/test/data/KLF4_f2_scan_results_all.txt +1 -2
data/test/data/KLF4_f2_scan_results_default_cutoff.txt +1 -2
data/test/data/KLF4_f2_scan_results_precise_mode.txt +1 -2
data/test/data/KLF4_f2_scan_results_weak_threshold.txt +2 -0
data/test/data/{SP1_f1.pat → SP1_f1.pwm} +0 -0
data/test/data/{SP1_f1_revcomp.pat → SP1_f1_revcomp.pwm} +0 -0
data/test/data/collection_pcm_without_thresholds.yaml +186 -183
data/test/data/collection_without_thresholds.yaml +186 -183
data/test/data/{medium_motif.pat → medium_motif.pwm} +0 -0
data/test/data/{short_motif.pat → short_motif.pwm} +0 -0
data/test/data/test_collection/{GABPA_f1.pat → GABPA_f1.pwm} +0 -0
data/test/data/test_collection/{KLF4_f2.pat → KLF4_f2.pwm} +0 -0
data/test/data/test_collection/{SP1_f1.pat → SP1_f1.pwm} +0 -0
data/test/data/test_collection.yaml +179 -176
data/test/data/test_collection_weak.yaml +214 -0
data/test/eval_alignment_test.rb +97 -21
data/test/eval_similarity_test.rb +104 -26
data/test/find_pvalue_test.rb +22 -9
data/test/find_threshold_test.rb +76 -25
data/test/preprocess_collection_test.rb +16 -21
data/test/scan_collection_test.rb +26 -14
data/test/test_helper.rb +96 -12
metadata +44 -24

data/test/eval_alignment_test.rb CHANGED Viewed

@@ -1,35 +1,111 @@
 require_relative 'test_helper'
 class TestEvalAlignment < Test::Unit::TestCase
-  def test_process_pcm_files
-    assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pcm test/data/SP1_f1.pcm -1 direct --pcm')
+  include Helpers
+  def setup
+    @start_dir = Dir.pwd
+    Dir.chdir File.join(File.dirname(__FILE__), 'data')
+  end
+  def teardown
+    Dir.chdir(@start_dir)
   end
-  def test_process_at_optimal_alignment
-    assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')
+  def test_process_weak_threshold
+    assert_similarity_info_output({similarity: 0.24382446963092125,
+                                  distance: 0.7561755303690787,
+                                  length: 11,
+                                  shift: -1,
+                                  orientation: 'direct',
+                                  words_recognized_by_both: 839.0,
+                                  threshold_first: 5.8,
+                                  words_recognized_by_first: 2104.0,
+                                  pvalue_recognized_by_first: 0.0005016326904296875,
+                                  threshold_second: 5.6,
+                                  words_recognized_by_second: 2176.0,
+                                  pvalue_recognized_by_second: 0.000518798828125,
+                                  matrix_first_alignment:  '.>>>>>>>>>>',
+                                  matrix_second_alignment: '>>>>>>>>>>>' #, discretization: 10.0
+                                  },
+                                  Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 direct'))
+  end
+  def test_process_strong_threshold
+    assert_similarity_info_output({similarity: 0.2420758234928527,
+                                  distance: 0.7579241765071473,
+                                  length: 11,
+                                  shift: -1,
+                                  orientation: 'direct',
+                                  words_recognized_by_both: 779.0,
+                                  threshold_first: 5.8100000000000005,
+                                  words_recognized_by_first: 1964.0,
+                                  pvalue_recognized_by_first: 0.00046825408935546875,
+                                  threshold_second: 5.61,
+                                  words_recognized_by_second: 2033.0,
+                                  pvalue_recognized_by_second: 0.00048470497131347656,
+                                  matrix_first_alignment:  '.>>>>>>>>>>',
+                                  matrix_second_alignment: '>>>>>>>>>>>' #, discretization: 10.0
+                                  },
+                                  Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 direct --boundary lower'))
+  end
+  def test_process_custom_thresholds
+    assert_similarity_info_output({similarity: 0.28505023241865346,
+                                   words_recognized_by_both: 1901.0,
+                                   words_recognized_by_first: 4348.0,
+                                   words_recognized_by_second: 4222.0,
+#                                   threshold_first: 4.7,
+#                                   threshold_second: 4.6
+                                   },
+                                   Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 direct --first-threshold 4.7 --second-threshold 4.6'))
   end
   def test_process_not_optimal_alignment
-    assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct')
+    assert_similarity_info_output({similarity: 0.004517983923018248,
+                                  length: 12,
+                                  words_recognized_by_both: 77.0,
+                                  words_recognized_by_first: 8416.0,
+                                  words_recognized_by_second: 8704.0,
+                                  matrix_first_alignment:  '>>>>>>>>>>..',
+                                  matrix_second_alignment: '.>>>>>>>>>>>',
+                                  shift: 1,
+                                  orientation: 'direct'},
+                                  Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm 1 direct'))
+  end
+  def test_process_at_optimal_alignment_reversed
+    assert_similarity_info_output({similarity: 0.0,
+                                  words_recognized_by_both: 0.0,
+                                  length: 11,
+                                  matrix_first_alignment: '.>>>>>>>>>>',
+                                  matrix_second_alignment:'<<<<<<<<<<<',
+                                  shift: -1,
+                                  orientation: 'revcomp'},
+                                  Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 revcomp'))
   end
+  def test_process_pcm_files
+    assert_equal( Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm -1 direct'),
+                  Helpers.eval_alignment_output('KLF4_f2.pcm SP1_f1.pcm -1 direct --pcm'))
+  end
   def test_process_alignment_first_motif_from_stdin
-    assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n",
-      Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')) {
-        Helpers.eval_alignment_output('.stdin test/data/SP1_f1.pat 0 direct')
-      }
+    result = Helpers.provide_stdin(File.read('KLF4_f2.pwm')) {
+      Helpers.eval_alignment_output('.stdin SP1_f1.pwm 0 direct') }
+    assert_equal( Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm 0 direct'),
+                  result )
   end
   def test_process_alignment_second_motif_from_stdin
-    assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n",
-      Helpers.provide_stdin(File.read('test/data/SP1_f1.pat')) {
-        Helpers.eval_alignment_output('test/data/KLF4_f2.pat .stdin 0 direct')
-      }
+    result = Helpers.provide_stdin(File.read('SP1_f1.pwm')) {
+      Helpers.eval_alignment_output('KLF4_f2.pwm .stdin 0 direct') }
+    assert_equal( Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm 0 direct'),
+                  result )
   end
   def test_process_alignment_both_motifs_from_stdin
-    assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n",
-      Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat') + File.read('test/data/SP1_f1.pat')) {
-        Helpers.eval_alignment_output('.stdin .stdin 0 direct')
-      }
-  end
-  def test_process_at_optimal_alignment_reversed
-    assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", Helpers.eval_alignment_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp')
+    result = Helpers.provide_stdin(File.read('KLF4_f2.pwm') + File.read('SP1_f1.pwm')) {
+      Helpers.eval_alignment_output('.stdin .stdin 0 direct') }
+    assert_equal( Helpers.eval_alignment_output('KLF4_f2.pwm SP1_f1.pwm 0 direct'),
+                  result )
   end
-end
+end

data/test/eval_similarity_test.rb CHANGED Viewed

@@ -1,45 +1,123 @@
 require_relative 'test_helper'
 class TestEvalSimilarity < Test::Unit::TestCase
-  def test_process_pair_of_pcms
-    assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_similarity_output('test/data/KLF4_f2.pcm test/data/SP1_f1.pcm --pcm')
+  include Helpers
+  def setup
+    @start_dir = Dir.pwd
+    Dir.chdir File.join(File.dirname(__FILE__), 'data')
   end
-  def test_process_pair_of_pwms
-    assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", Helpers.eval_similarity_output('test/data/KLF4_f2.pat test/data/SP1_f1.pat')
+  def teardown
+    Dir.chdir(@start_dir)
   end
-  def test_process_another_pair_of_pwms
-    assert_equal "0.0037332005973120955\n15.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>.\n1\tdirect\n", Helpers.eval_similarity_output('test/data/SP1_f1.pat test/data/AHR_si.pat')
+  def test_process_strong_thresholds
+    assert_similarity_info_output({similarity: 0.2420758234928527,
+                                  distance: 0.7579241765071473,
+                                  length: 11,
+                                  shift: -1,
+                                  orientation: 'direct',
+                                  words_recognized_by_both: 779.0,
+                                  threshold_first: 5.8100000000000005,
+                                  words_recognized_by_first: 1964.0,
+                                  pvalue_recognized_by_first: 0.00046825408935546875,
+                                  threshold_second: 5.61,
+                                  words_recognized_by_second: 2033.0,
+                                  pvalue_recognized_by_second: 0.00048470497131347656,
+                                  matrix_first_alignment:  '.>>>>>>>>>>',
+                                  matrix_second_alignment: '>>>>>>>>>>>' #, discretization: 10.0
+                                  },
+                                  Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm --boundary lower'))
+  end
+  def test_process_weak_thresholds
+    assert_similarity_info_output({similarity: 0.24382446963092125,
+                                  distance: 0.7561755303690787,
+                                  length: 11,
+                                  shift: -1,
+                                  orientation: 'direct',
+                                  words_recognized_by_both: 839.0,
+                                  threshold_first: 5.8,
+                                  words_recognized_by_first: 2104.0,
+                                  pvalue_recognized_by_first: 0.0005016326904296875,
+                                  threshold_second: 5.6,
+                                  words_recognized_by_second: 2176.0,
+                                  pvalue_recognized_by_second: 0.000518798828125,
+                                  matrix_first_alignment:  '.>>>>>>>>>>',
+                                  matrix_second_alignment: '>>>>>>>>>>>' #, discretization: 10.0
+                                  },
+                                  Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'))
+  end
+  def test_process_custom_threshold
+    assert_similarity_info_output({similarity: 0.28505023241865346,
+                                  words_recognized_by_both: 1901.0,
+                                  words_recognized_by_first: 4348.0,
+                                  words_recognized_by_second: 4222.0,
+#                                  threshold_first: 4.7,
+#                                  threshold_second: 4.6
+                                  },
+                                  Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm --first-threshold 4.7 --second-threshold 4.6'))
   end
+  def test_process_dissimilar_pair_of_pwms
+    assert_similarity_info_output({similarity: 0.0037332005973120955,
+                                  words_recognized_by_both: 15.0,
+                                  words_recognized_by_first: 2033.0,
+                                  words_recognized_by_second: 2000.0,
+                                  length: 11,
+                                  matrix_first_alignment:  '>>>>>>>>>>>',
+                                  matrix_second_alignment: '.>>>>>>>>>.',
+                                  shift: 1,
+                                  orientation: 'direct'},
+                                  Helpers.eval_similarity_output('SP1_f1.pwm AHR_si.pwm --boundary lower'))
+  end
   def test_recognize_orientation_of_alignment
-    assert_equal "1.0\n2033.0\t11\n>>>>>>>>>>>\n<<<<<<<<<<<\n0\trevcomp\n", Helpers.eval_similarity_output('test/data/SP1_f1_revcomp.pat test/data/SP1_f1.pat')
+    assert_similarity_info_output({similarity: 1.0,
+                                  words_recognized_by_both: 2176.0,
+                                  words_recognized_by_first: 2176.0,
+                                  words_recognized_by_second: 2176.0,
+                                  length: 11,
+                                  matrix_first_alignment:  '>>>>>>>>>>>',
+                                  matrix_second_alignment: '<<<<<<<<<<<',
+                                  shift: 0,
+                                  orientation: 'revcomp'},
+                                  Helpers.eval_similarity_output('SP1_f1_revcomp.pwm SP1_f1.pwm'))
   end
   def test_process_custom_discretization
-    assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n", Helpers.eval_similarity_output('test/data/SP1_f1.pat test/data/KLF4_f2.pat -d 1')
+    assert_similarity_info_output({similarity: 0.2580456407255705,
+                                  words_recognized_by_both: 1323.0,
+                                  words_recognized_by_first: 3554.0,
+                                  words_recognized_by_second: 2896.0,
+                                  length: 11,
+                                  matrix_first_alignment:  '>>>>>>>>>>>',
+                                  matrix_second_alignment: '.>>>>>>>>>>',
+                                  shift: 1,
+                                  orientation: 'direct' #, discretization: 1.0
+                                  },
+                                  Helpers.eval_similarity_output('SP1_f1.pwm KLF4_f2.pwm -d 1'))
   end
+  def test_process_pcm_files
+    assert_equal( Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'),
+                  Helpers.eval_similarity_output('KLF4_f2.pcm SP1_f1.pcm --pcm'))
+  end
   def test_process_first_motif_from_stdin
-    assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n",
-      Helpers.provide_stdin(File.read('test/data/SP1_f1.pat')){
-        Helpers.eval_similarity_output('.stdin test/data/KLF4_f2.pat -d 1')
-      }
+    result = Helpers.provide_stdin(File.read('KLF4_f2.pwm')){
+      Helpers.eval_similarity_output('.stdin SP1_f1.pwm') }
+    assert_equal(Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'), result)
   end
   def test_process_second_motif_from_stdin
-    assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n",
-      Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')){
-        Helpers.eval_similarity_output('test/data/SP1_f1.pat .stdin -d 1')
-      }
+    result = Helpers.provide_stdin(File.read('SP1_f1.pwm')){
+      Helpers.eval_similarity_output('KLF4_f2.pwm .stdin') }
+    assert_equal(Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'), result)
   end
   def test_process_both_motifs_from_stdin
-    assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n",
-      Helpers.provide_stdin(File.read('test/data/SP1_f1.pat') + File.read('test/data/KLF4_f2.pat')){
-        Helpers.eval_similarity_output('.stdin .stdin -d 1')
-      }
+    result = Helpers.provide_stdin(File.read('KLF4_f2.pwm') + File.read('SP1_f1.pwm')){
+      Helpers.eval_similarity_output('.stdin .stdin') }
+    assert_equal(Helpers.eval_similarity_output('KLF4_f2.pwm SP1_f1.pwm'), result)
   end
 end

data/test/find_pvalue_test.rb CHANGED Viewed

@@ -1,26 +1,39 @@
 require_relative 'test_helper'
 class FindPvalueTest < Test::Unit::TestCase
+  def setup
+    @start_dir = Dir.pwd
+    Dir.chdir File.join(File.dirname(__FILE__), 'data')
+  end
+  def teardown
+    Dir.chdir(@start_dir)
+  end
   def test_process_pcm
-    assert_equal "4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pcm 4.1719 --pcm')
+    assert_equal [%w[4.1719 1048.0 0.00099945068359375]], Helpers.find_pvalue_output('KLF4_f2.pcm 4.1719 --pcm')
   end
   def test_process_one_threshold
-    assert_equal "4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 4.1719')
+    assert_equal [%w[4.1719 1048.0 0.00099945068359375]], Helpers.find_pvalue_output('KLF4_f2.pwm 4.1719')
   end
   def test_process_several_thresholds
-    assert_equal "4.1719\t1048.0\t0.00099945068359375\n5.2403\t524.0\t0.000499725341796875\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 4.1719 5.2403')
+    assert_equal [%w[4.1719 1048.0 0.00099945068359375],
+                  %w[5.2403 524.0 0.000499725341796875]], Helpers.find_pvalue_output('KLF4_f2.pwm 4.1719 5.2403')
   end
   def test_process_several_thresholds_result_is_ordered
-    assert_equal "5.2403\t524.0\t0.000499725341796875\n4.1719\t1048.0\t0.00099945068359375\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 5.2403 4.1719')
+    assert_equal [%w[5.2403 524.0 0.000499725341796875],
+                  %w[4.1719 1048.0 0.00099945068359375]], Helpers.find_pvalue_output('KLF4_f2.pwm 5.2403 4.1719')
   end
   def test_custom_discretization
-    assert_equal "5.2403\t527.0\t0.0005025863647460938\n", Helpers.find_pvalue_output('test/data/KLF4_f2.pat 5.2403 -d 100')
+    assert_equal [%w[5.2403 527.0 0.0005025863647460938]], Helpers.find_pvalue_output('KLF4_f2.pwm 5.2403 -d 100')
+  end
+  def test_probability_wise_backgrond
+    assert_equal [%w[5.2403 0.0005025863647460938]], Helpers.find_pvalue_output('KLF4_f2.pwm 5.2403 -d 100 -b 0.25,0.25,0.25,0.25')
+  end
+  def test_custom_background
+    assert_equal [%w[5.2403 6.815000000000001e-06]], Helpers.find_pvalue_output('KLF4_f2.pwm 5.2403 -b 0.4,0.1,0.1,0.4')
   end
   def test_process_pwm_from_stdin
-    assert_equal Helpers.find_pvalue_output('test/data/KLF4_f2.pat 1'),
-                Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')) {
-                  Helpers.find_pvalue_output('.stdin 1')
-                }
+    assert_equal Helpers.find_pvalue_output('KLF4_f2.pwm 1'),
+                Helpers.provide_stdin(File.read 'KLF4_f2.pwm'){  Helpers.find_pvalue_output('.stdin 1') }
   end
 end

data/test/find_threshold_test.rb CHANGED Viewed

@@ -1,40 +1,91 @@
 require_relative 'test_helper'
 class FindThresholdTest < Test::Unit::TestCase
+  include Helpers
+  def setup
+    @start_dir = Dir.pwd
+    Dir.chdir File.join(File.dirname(__FILE__), 'data')
+  end
+  def teardown
+    Dir.chdir(@start_dir)
+  end
+  def test_process_one_pvalue_weak_thresold
+    assert_threshold_info_output({requested_pvalue: 0.001,
+                                  real_pvalue: 0.0010004043579101562,
+                                  number_of_recognized_words: 1049.0,
+                                  threshold: 4.1718},
+                                  Helpers.find_threshold_output("KLF4_f2.pwm 0.001 --boundary upper") )
+    # additional consistency checks
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.1718"), '0.0010004043579101562'
+  end
+  def test_process_one_pvalue_strong_thresold
+    assert_threshold_info_output({requested_pvalue: 0.001,
+                                  real_pvalue: 0.00099945068359375,
+                                  number_of_recognized_words: 1048.0,
+                                  threshold: 4.17189},
+                                  Helpers.find_threshold_output("KLF4_f2.pwm 0.001") )
+    # additional consistency checks
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
+  end
   def test_process_several_pvalues
     pvalues = []
-    Helpers.find_threshold_output('test/data/KLF4_f2.pat -p 0.001 0.0005').lines.each{|line|
-      pvalue, threshold, real_pvalue = line.strip.split("\t")
-      pvalues << pvalue
-      assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
-    }
-    assert_equal pvalues, ['0.0005', '0.001']
+    assert_threshold_info_output({requested_pvalue: 0.0005,
+                                  real_pvalue: 0.000499725341796875,
+                                  number_of_recognized_words: 524.0,
+                                  threshold: 5.24071},
+                                  {requested_pvalue: 0.001,
+                                  real_pvalue: 0.00099945068359375,
+                                  number_of_recognized_words: 1048.0,
+                                  threshold: 4.17189},
+                                  Helpers.find_threshold_output('KLF4_f2.pwm 0.001 0.0005') )
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 4.17189"), '0.00099945068359375'
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.24071"), '0.000499725341796875'
   end
   def test_process_pcm
-    pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pcm -p 0.001 --pcm').strip.split("\t")
-    assert_equal '0.001', pvalue
-    assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
-  end
-  def test_process_one_pvalue
-    pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pat -p 0.001').strip.split("\t")
-    assert_equal '0.001', pvalue
-    assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
+    assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm"),
+                  Helpers.find_threshold_output("KLF4_f2.pcm --pcm"))
   end
   def test_process_default_pvalue
-    pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pat').strip.split("\t")
-    assert_equal '0.0005', pvalue
-    assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
+    assert_equal( Helpers.find_threshold_output("KLF4_f2.pwm 0.0005"),
+                  Helpers.find_threshold_output("KLF4_f2.pwm"))
   end
   def test_custom_discretization
-    pvalue, threshold, real_pvalue = Helpers.find_threshold_output('test/data/KLF4_f2.pat -d 100').strip.split("\t")
-    assert_equal '0.0005', pvalue
-    assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold} -d 100"), real_pvalue
+    assert_threshold_info_output({requested_pvalue: 0.0005,
+                                  real_pvalue: 0.0004978179931640625,
+                                  number_of_recognized_words: 522.0,
+                                  threshold: 5.281000000000001},
+                                  Helpers.find_threshold_output("KLF4_f2.pwm -d 100") )
+    # additional consistency checks
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm 5.281000000000001 -d 100"), '0.0004978179931640625'
+  end
+  def test_custom_background
+    assert_threshold_info_output({requested_pvalue: 0.0005,
+                                  real_pvalue: '0.00049964290000001',
+                                  threshold: '-0.10449000000000001'},
+                                  Helpers.find_threshold_output("KLF4_f2.pwm -b 0.4,0.1,0.1,0.4") )
+    # additional consistency checks
+    assert_equal Helpers.obtain_pvalue_by_threshold("KLF4_f2.pwm -0.10449000000000001 -b 0.4,0.1,0.1,0.4"), '0.0004996429000000166' # here real pvalue differs at last digits =\
   end
   def test_process_pwm_from_stdin
-    assert_equal Helpers.find_threshold_output('test/data/KLF4_f2.pat'),
-                Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')) {
-                  Helpers.find_threshold_output('.stdin')
-                }
+    assert_equal Helpers.find_threshold_output('KLF4_f2.pwm'),
+                Helpers.provide_stdin(File.read('KLF4_f2.pwm')){ Helpers.find_threshold_output('.stdin') }
   end
-end
+  # TODO: it should be rewritten as a spec for count_distribution_under_pvalue - not to raise an error(log out of domain) and return a value
+  def test_process_large_pvalue
+    assert_nothing_raised do
+      # discretization is set not to take very long time calculation
+      assert_threshold_info_output({requested_pvalue: 0.8,
+                                  real_pvalue: 0.7996518611907959,
+                                  number_of_recognized_words: 3353983.0,
+                                  threshold: -17.89},
+                                  Helpers.find_threshold_output('SP1_f1.pwm 0.8 -d 10') )
+    end
+    assert_equal Helpers.obtain_pvalue_by_threshold("SP1_f1.pwm -17.89 -d 10"), '0.7996518611907959'
+  end
+end

data/test/preprocess_collection_test.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 require_relative 'test_helper'
 require 'yaml'
-# Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.93 p194
+# Don't use YAML.load_file() instead of YAML.load(File.read()) because in ruby before v1.9.3 p194
 # it doesn't immediately release file descriptor (if I understood error right way) so File.delete fails
 class TestPreprocessCollection < Test::Unit::TestCase
@@ -11,60 +11,55 @@ class TestPreprocessCollection < Test::Unit::TestCase
   end
   def teardown
     File.delete('test_collection.yaml.tmp')  if File.exist? 'test_collection.yaml.tmp'
+    File.delete('my_collection.yaml')  if File.exist? 'my_collection.yaml'
     Dir.chdir(@start_dir)
   end
+  def test_weak_thresholds
+    Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent')
+    assert_equal YAML.load(File.read('test_collection_weak.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
+  end
   def test_multipvalue_preprocessing
-    Helpers.run_preprocess_collection('test_collection -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
+    Helpers.run_preprocess_collection('test_collection test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
     assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
   end
   def test_preprocessing_collection_from_a_single_file
-    Helpers.run_preprocess_collection('test_collection_single_file.txt -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
+    Helpers.run_preprocess_collection('test_collection_single_file.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
     assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
   end
   def test_preprocessing_collection_from_stdin
-    Helpers.provide_stdin('test_collection/GABPA_f1.pat  test_collection/KLF4_f2.pat  test_collection/SP1_f1.pat'){
-      Helpers.run_preprocess_collection('.stdin -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
+    Helpers.provide_stdin('test_collection/GABPA_f1.pwm  test_collection/KLF4_f2.pwm  test_collection/SP1_f1.pwm'){
+      Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
     }
     assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
   end
   def test_preprocessing_folder_pcm
-    Helpers.run_preprocess_collection('test_collection_pcm -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent --pcm')
+    Helpers.run_preprocess_collection('test_collection_pcm test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
     assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
   end
   def test_preprocessing_collection_from_a_single_file_pcm
-    Helpers.run_preprocess_collection('test_collection_single_file_pcm.txt -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent --pcm')
+    Helpers.run_preprocess_collection('test_collection_single_file_pcm.txt test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
     assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
   end
   def test_preprocessing_collection_from_a_collection
-    Helpers.run_preprocess_collection('collection_without_thresholds.yaml -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
+    Helpers.run_preprocess_collection('collection_without_thresholds.yaml test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --boundary lower')
     assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
   end
   def test_preprocessing_collection_from_a_pcm_collection
-    Helpers.run_preprocess_collection('collection_pcm_without_thresholds.yaml -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent --pcm')
+    Helpers.run_preprocess_collection('collection_pcm_without_thresholds.yaml test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
     assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
   end
   def test_preprocessing_collection_from_stdin_pcm
     Helpers.provide_stdin('test_collection_pcm/GABPA_f1.pcm  test_collection_pcm/KLF4_f2.pcm  test_collection_pcm/SP1_f1.pcm'){
-      Helpers.run_preprocess_collection('.stdin -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent --pcm')
+      Helpers.run_preprocess_collection('.stdin test_collection.yaml.tmp -p 0.0005,0.0001,0.00005 --silent --pcm --boundary lower')
     }
     assert_equal YAML.load(File.read('test_collection.yaml')), YAML.load(File.read('test_collection.yaml.tmp'))
   end
-  def test_with_name_specified
-    Helpers.run_preprocess_collection('test_collection -n my_collection -p 0.0005 0.0001 0.00005 --silent')
-    assert_equal YAML.load(File.read('test_collection.yaml')).set_parameters(name:'my_collection'), YAML.load(File.read('my_collection.yaml'))
-    File.delete('my_collection.yaml')
-  end
-  def test_with_name_and_output_specified
-    Helpers.run_preprocess_collection('test_collection -n my_collection -o test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent')
-    assert_equal YAML.load(File.read('test_collection.yaml')).set_parameters(name:'my_collection'), YAML.load(File.read('test_collection.yaml.tmp'))
-  end
 end

data/test/scan_collection_test.rb CHANGED Viewed

@@ -1,36 +1,48 @@
 require_relative 'test_helper'
 class TestScanCollection < Test::Unit::TestCase
+  def setup
+    @start_dir = Dir.pwd
+    Dir.chdir File.join(File.dirname(__FILE__), 'data')
+  end
+  def teardown
+    Dir.chdir(@start_dir)
+  end
   def test_scan_pcm
-    assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
-                 Helpers.scan_collection_output('test/data/KLF4_f2.pcm test/data/test_collection.yaml --silent --pcm').gsub("\r\n","\n")
+    assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pcm test_collection.yaml --silent --pcm --boundary lower').gsub("\r\n","\n")
   end
   def test_scan_default_cutoff
-    assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
-                 Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --silent').gsub("\r\n","\n")
+    assert_equal File.read('KLF4_f2_scan_results_default_cutoff.txt').gsub("\r\n", "\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower').gsub("\r\n","\n")
+  end
+  def test_scan_weak_threshold
+    assert_equal File.read('KLF4_f2_scan_results_weak_threshold.txt').gsub("\r\n", "\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection_weak.yaml --silent').gsub("\r\n","\n")
   end
   def test_scan_and_output_all_results
-    assert_equal File.read('test/data/KLF4_f2_scan_results_all.txt').gsub("\r\n", "\n"),
-                 Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --all --silent').gsub("\r\n","\n")
+    assert_equal File.read('KLF4_f2_scan_results_all.txt').gsub("\r\n", "\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --all --silent --boundary lower').gsub("\r\n","\n")
   end
   def test_scan_precise_mode
-    assert_equal File.read('test/data/KLF4_f2_scan_results_precise_mode.txt').gsub("\r\n","\n"),
-                 Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --precise --all --silent').gsub("\r\n", "\n")
+    assert_equal File.read('KLF4_f2_scan_results_precise_mode.txt').gsub("\r\n","\n"),
+                 Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
   end
   def test_process_query_pwm_from_stdin
-    assert_equal Helpers.scan_collection_output('test/data/KLF4_f2.pat test/data/test_collection.yaml --silent'),
-                Helpers.provide_stdin(File.read('test/data/KLF4_f2.pat')) {
-                  Helpers.scan_collection_output('.stdin test/data/test_collection.yaml --silent')
+    assert_equal Helpers.scan_collection_output('KLF4_f2.pwm test_collection.yaml --silent --boundary lower'),
+                Helpers.provide_stdin(File.read('KLF4_f2.pwm')) {
+                  Helpers.scan_collection_output('.stdin test_collection.yaml --silent --boundary lower')
                 }
   end
   def test_scan_medium_length_motif
     assert_match /Query motif medium_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the rough discretization level of 1. Forcing precise discretization level of 10/,
-                 Helpers.scan_collection_stderr('test/data/medium_motif.pat test/data/test_collection.yaml --precise --all --silent').gsub("\r\n", "\n")
+                 Helpers.scan_collection_stderr('medium_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
   end
   def test_scan_short_length_motif
     assert_match /Query motif short_motif_name gives 0 recognized words for a given P-value of 0\.0005 with the precise discretization level of 10\. It.s impossible to scan collection for this motif/,
-                 Helpers.scan_collection_stderr('test/data/short_motif.pat test/data/test_collection.yaml --precise --all --silent').gsub("\r\n", "\n")
+                 Helpers.scan_collection_stderr('short_motif.pwm test_collection.yaml --precise --all --silent --boundary lower').gsub("\r\n", "\n")
   end
 end