RubyGems - macroape - Versions diffs - 3.2.2 → 3.3.0 - Mend

macroape 3.2.2 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

data/Rakefile +28 -7
data/lib/macroape.rb +14 -26
data/lib/macroape/aligned_pair_intersection.rb +24 -24
data/lib/macroape/collection.rb +1 -2
data/lib/macroape/count_by_threshold.rb +8 -26
data/lib/macroape/exec/eval_alignment.rb +19 -19
data/lib/macroape/exec/eval_similarity.rb +18 -16
data/lib/macroape/exec/find_pvalue.rb +8 -6
data/lib/macroape/exec/find_threshold.rb +7 -5
data/lib/macroape/exec/preprocess_collection.rb +10 -7
data/lib/macroape/exec/scan_collection.rb +13 -10
data/lib/macroape/pwm_compare.rb +33 -2
data/lib/macroape/pwm_compare_aligned.rb +38 -2
data/lib/macroape/threshold_by_pvalue.rb +48 -43
data/lib/macroape/version.rb +3 -3
data/macroape.gemspec +2 -0
data/test/data/test_collection.yaml +70 -4
data/test/eval_alignment_similarity_test.rb +19 -0
data/test/eval_similarity_test.rb +26 -0
data/test/find_pvalue_test.rb +25 -0
data/test/find_threshold_test.rb +29 -0
data/test/preprocess_collection_test.rb +9 -0
data/test/scan_collection_test.rb +17 -0
data/test/test_helper.rb +10 -0
metadata +33 -16
data/lib/macroape/aligned_pair_metrics.rb +0 -24
data/lib/macroape/aligned_pair_transformations.rb +0 -23
data/lib/macroape/extract_pwm.rb +0 -32
data/lib/macroape/gauss_estimation.rb +0 -30
data/lib/macroape/matrix_information.rb +0 -29
data/lib/macroape/matrix_on_background.rb +0 -16
data/lib/macroape/matrix_transformations.rb +0 -29
data/lib/macroape/pair_metrics.rb +0 -9
data/lib/macroape/pair_transformations.rb +0 -28
data/lib/macroape/single_matrix.rb +0 -45
data/lib/macroape/support.rb +0 -34
data/test/macroape_test.rb +0 -125

data/test/eval_alignment_similarity_test.rb ADDED Viewed

@@ -0,0 +1,19 @@
+require 'test_helper'
+class TestEvalAlignmentSimilarity < Test::Unit::TestCase
+  def test_process_at_optimal_alignment
+    IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 direct')){|f|
+      assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
+    }
+  end
+  def test_process_not_optimal_alignment
+    IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat 0 direct')){|f|
+      assert_equal "0.0017543859649122807\n7.0\t11\n>>>>>>>>>>.\n>>>>>>>>>>>\n0\tdirect\n", f.read
+    }
+  end
+  def test_process_at_optimal_alignment_reversed
+    IO.popen(Helpers.exec_cmd('eval_alignment','test/data/KLF4_f2.pat test/data/SP1_f1.pat -1 revcomp')){|f|
+      assert_equal "0.0\n0.0\t11\n.>>>>>>>>>>\n<<<<<<<<<<<\n-1\trevcomp\n", f.read
+    }
+  end
+end

data/test/eval_similarity_test.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require 'test_helper'
+class TestEvalSimilarity < Test::Unit::TestCase
+  def test_process_pair_of_pwms
+    IO.popen(Helpers.exec_cmd('eval_similarity','test/data/KLF4_f2.pat test/data/SP1_f1.pat')){|f|
+      assert_equal "0.2420758234928527\n779.0\t11\n.>>>>>>>>>>\n>>>>>>>>>>>\n-1\tdirect\n", f.read
+    }
+  end
+  def test_process_another_pair_of_pwms
+    IO.popen(Helpers.exec_cmd('eval_similarity','test/data/SP1_f1.pat test/data/AHR_si.pat')){|f|
+      assert_equal "0.0037332005973120955\n15.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>.\n1\tdirect\n", f.read
+    }
+  end
+  def test_recognize_orientation_of_alignment
+    IO.popen(Helpers.exec_cmd('eval_similarity','test/data/SP1_f1_revcomp.pat test/data/SP1_f1.pat')){|f|
+      assert_equal "1.0\n2033.0\t11\n>>>>>>>>>>>\n<<<<<<<<<<<\n0\trevcomp\n", f.read
+    }
+  end
+  def test_process_custom_discretization
+    IO.popen(Helpers.exec_cmd('eval_similarity','test/data/SP1_f1.pat test/data/KLF4_f2.pat -d 1')){|f|
+      assert_equal "0.22754919499105544\n636.0\t11\n>>>>>>>>>>>\n.>>>>>>>>>>\n1\tdirect\n", f.read
+    }
+  end
+end

data/test/find_pvalue_test.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require 'test_helper'
+class FindPvalueTest < Test::Unit::TestCase
+  def test_process_one_threshold
+    IO.popen(Helpers.exec_cmd('find_pvalue', 'test/data/KLF4_f2.pat 4.1719')){|f|
+      assert_equal "4.1719\t1048.0\t0.00099945068359375\n", f.read
+    }
+  end
+  def test_process_several_thresholds
+    IO.popen(Helpers.exec_cmd('find_pvalue','test/data/KLF4_f2.pat 4.1719 5.2403')){|f|
+      assert_equal "4.1719\t1048.0\t0.00099945068359375\n5.2403\t524.0\t0.000499725341796875\n", f.read
+    }
+  end
+  def test_process_several_thresholds_result_is_ordered
+    IO.popen(Helpers.exec_cmd('find_pvalue','test/data/KLF4_f2.pat 5.2403 4.1719')){|f|
+      assert_equal "5.2403\t524.0\t0.000499725341796875\n4.1719\t1048.0\t0.00099945068359375\n", f.read
+    }
+  end
+  def test_custom_discretization
+    IO.popen(Helpers.exec_cmd('find_pvalue','test/data/KLF4_f2.pat 5.2403 -d 100')){|f|
+      assert_equal "5.2403\t527.0\t0.0005025863647460938\n", f.read
+    }
+  end
+end

data/test/find_threshold_test.rb ADDED Viewed

@@ -0,0 +1,29 @@
+require 'test_helper'
+class FindThresholdTest < Test::Unit::TestCase
+  def test_process_several_pvalues
+    pvalues = []
+    IO.popen(Helpers.exec_cmd('find_threshold', 'test/data/KLF4_f2.pat -p 0.001 0.0005'), &:read).lines.each{|line|
+      pvalue, threshold, real_pvalue = line.strip.split("\t")
+      pvalues << pvalue
+      assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
+    }
+    assert_equal pvalues, ['0.0005', '0.001']
+  end
+  def test_process_one_pvalue
+    pvalue, threshold, real_pvalue = IO.popen(Helpers.exec_cmd('find_threshold', 'test/data/KLF4_f2.pat -p 0.001'), &:read).strip.split("\t")
+    assert_equal '0.001', pvalue
+    assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
+  end
+  def test_process_default_pvalue
+    pvalue, threshold, real_pvalue = IO.popen(Helpers.exec_cmd('find_threshold','test/data/KLF4_f2.pat'), &:read).strip.split("\t")
+    assert_equal '0.0005', pvalue
+    assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold}"), real_pvalue
+  end
+  def test_custom_discretization
+    pvalue, threshold, real_pvalue = IO.popen(Helpers.exec_cmd('find_threshold','test/data/KLF4_f2.pat -d 100'), &:read).strip.split("\t")
+    assert_equal '0.0005', pvalue
+    assert_equal Helpers.obtain_pvalue_by_threshold("test/data/KLF4_f2.pat #{threshold} -d 100"), real_pvalue
+  end
+end

data/test/preprocess_collection_test.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'test_helper'
+class TestPreprocessCollection < Test::Unit::TestCase
+  def test_multipvalue_preproceessing
+    system(Helpers.exec_cmd('preprocess_collection','./test/data/test_collection -o test/data/test_collection.yaml.tmp -p 0.0005 0.0001 0.00005 --silent'))
+    assert_equal File.read('test/data/test_collection.yaml'), File.read('test/data/test_collection.yaml.tmp')
+    File.delete 'test/data/test_collection.yaml.tmp'
+  end
+end

data/test/scan_collection_test.rb ADDED Viewed

@@ -0,0 +1,17 @@
+require 'test_helper'
+class TestScanCollection < Test::Unit::TestCase
+  def test_scan_default_cutoff
+    assert_equal File.read('test/data/KLF4_f2_scan_results_default_cutoff.txt'),
+                 IO.popen(Helpers.exec_cmd('scan_collection','test/data/KLF4_f2.pat test/data/test_collection.yaml --silent'), &:read)
+  end
+  def test_scan_and_output_all_results
+    assert_equal File.read('test/data/KLF4_f2_scan_results_all.txt'),
+                 IO.popen(Helpers.exec_cmd('scan_collection','test/data/KLF4_f2.pat test/data/test_collection.yaml --all --silent'), &:read)
+  end
+  def test_scan_precise_mode
+    assert_equal File.read('test/data/KLF4_f2_scan_results_precise_mode.txt'),
+                 IO.popen(Helpers.exec_cmd('scan_collection','test/data/KLF4_f2.pat test/data/test_collection.yaml --precise --all --silent'), &:read)
+  end
+end

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require 'test/unit'
+module Helpers
+  def self.obtain_pvalue_by_threshold(args)
+    IO.popen("find_pvalue #{args}",&:read).strip.split.last
+  end
+  def self.exec_cmd(executable, param_list)
+    "ruby #{File.dirname(File.absolute_path __FILE__)}/../lib/macroape/exec/#{executable}.rb #{param_list}"
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: macroape
 version: !ruby/object:Gem::Version
-  version: 3.2.2
+  version: 3.3.0
   prerelease:
 platform: ruby
 authors:
@@ -9,8 +9,24 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-05-28 00:00:00.000000000 Z
-dependencies: []
+date: 2012-06-20 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bioinform
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.1.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.1.0
 description: Macroape is an abbreviation for MAtrix CompaRisOn by Approximate P-value
   Estimation. It's a bioinformatic tool for evaluating similarity measure and best
   alignment between a pair of Position Weight Matrices(PWM), finding thresholds by
@@ -41,8 +57,6 @@ files:
 - bin/scan_collection
 - lib/macroape.rb
 - lib/macroape/aligned_pair_intersection.rb
-- lib/macroape/aligned_pair_metrics.rb
-- lib/macroape/aligned_pair_transformations.rb
 - lib/macroape/collection.rb
 - lib/macroape/count_by_threshold.rb
 - lib/macroape/exec/eval_alignment.rb
@@ -51,17 +65,8 @@ files:
 - lib/macroape/exec/find_threshold.rb
 - lib/macroape/exec/preprocess_collection.rb
 - lib/macroape/exec/scan_collection.rb
-- lib/macroape/extract_pwm.rb
-- lib/macroape/gauss_estimation.rb
-- lib/macroape/matrix_information.rb
-- lib/macroape/matrix_on_background.rb
-- lib/macroape/matrix_transformations.rb
-- lib/macroape/pair_metrics.rb
-- lib/macroape/pair_transformations.rb
 - lib/macroape/pwm_compare.rb
 - lib/macroape/pwm_compare_aligned.rb
-- lib/macroape/single_matrix.rb
-- lib/macroape/support.rb
 - lib/macroape/threshold_by_pvalue.rb
 - lib/macroape/version.rb
 - macroape.gemspec
@@ -76,7 +81,13 @@ files:
 - test/data/test_collection/GABPA_f1.pat
 - test/data/test_collection/KLF4_f2.pat
 - test/data/test_collection/SP1_f1.pat
-- test/macroape_test.rb
+- test/eval_alignment_similarity_test.rb
+- test/eval_similarity_test.rb
+- test/find_pvalue_test.rb
+- test/find_threshold_test.rb
+- test/preprocess_collection_test.rb
+- test/scan_collection_test.rb
+- test/test_helper.rb
 homepage: http://autosome.ru/macroape/
 licenses: []
 post_install_message:
@@ -113,4 +124,10 @@ test_files:
 - test/data/test_collection/GABPA_f1.pat
 - test/data/test_collection/KLF4_f2.pat
 - test/data/test_collection/SP1_f1.pat
-- test/macroape_test.rb
+- test/eval_alignment_similarity_test.rb
+- test/eval_similarity_test.rb
+- test/find_pvalue_test.rb
+- test/find_threshold_test.rb
+- test/preprocess_collection_test.rb
+- test/scan_collection_test.rb
+- test/test_helper.rb

data/lib/macroape/aligned_pair_metrics.rb DELETED Viewed

@@ -1,24 +0,0 @@
-module PWMCompare
-  module AlignedPairMetrics
-    def jaccard(first_threshold, second_threshold)
-      f = first.counts_by_thresholds(first_threshold).first
-      s = second.counts_by_thresholds(second_threshold).first
-      if f == 0 or s == 0
-        return {similarity: -1, tanimoto: -1, recognized_by_both: 0,
-              recognized_by_first: f,
-              recognized_by_second: s,
-            }
-      end
-      intersect = counts_for_two_matrices(first_threshold, second_threshold)
-      intersect = Math.sqrt(intersect[0] * intersect[1])
-      union = f + s - intersect
-      similarity = intersect.to_f / union
-      { similarity: similarity,
-        tanimoto: 1.0 - similarity,
-        recognized_by_both: intersect,
-        recognized_by_first: f,
-        recognized_by_second: s }
-    end
-  end
-end

data/lib/macroape/aligned_pair_transformations.rb DELETED Viewed

@@ -1,23 +0,0 @@
-module PWMCompare
-  module AlignedPairTransformations
-    #def discrete(rate)
-    #  PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
-    #end
-    def sort_pair_of_matrices_by(&block)
-      mat = first.pwm.zip(second.pwm).sort_by(&block).transpose
-      PWMCompareAligned.new(SinglePWM(mat[0],first.probabilities), SinglePWM(mat[1], second.probabilities))
-    end
-    def sort_decreasing_max
-      PWMCompareAligned.new(*sort_pair_of_matrices_by{|col_pair| -col_pair[0].max} )
-    end
-    def sort_increasing_min
-      PWMCompareAligned.new(*sort_pair_of_matrices_by{|col_pair| col_pair[0].min} )
-    end
-    def permute_columns(permutation_index)
-      PWMCompareAligned.new(first.permute(permutation_index), second.permute(permutation_index))
-    end
-  end
-end

data/lib/macroape/extract_pwm.rb DELETED Viewed

@@ -1,32 +0,0 @@
-# r_stream, w_stream - supposed to be a pipe. Data's read from r_stream, pwm's extracted, remaining data pushed back to w_stream
-#  ... --> w_stream --> r_stream --> data
-#              ^                       |
-#              |                       v
-#             ...  <--  w_stream  <-- ... --> extracted pwm
-def extract_pwm(r_stream, w_stream)
-  lines = r_stream.readlines
-  return [r_stream, w_stream, nil] if lines.empty?
-  extracted_pwm = [lines.shift]
-  while extracted_pwm.last.chomp == ''
-    extracted_pwm = [lines.shift.strip]
-    return [r_stream, w_stream, nil] unless extracted_pwm.last
-  end
-  r_stream.close
-  begin
-    until lines.empty?
-      line = lines.shift
-      line.split.each{|x| Float(x) } # raises error if string is not a numeric
-      raise 'Not a PWM string (too little number of numbers - may be empty string or name of next pwm). PWM finished' if line.split.size < 4
-      extracted_pwm << line
-    end
-  rescue
-    lines.unshift(line)
-  end
-  new_r_stream, new_w_stream = IO.pipe
-  lines.each{|one_line| new_w_stream.write(one_line)}
-  new_w_stream.close
-  [new_r_stream, new_w_stream, extracted_pwm]
-end

data/lib/macroape/gauss_estimation.rb DELETED Viewed

@@ -1,30 +0,0 @@
-module PWM
-  module GaussEstimation
-    def score_mean
-      bckgr = probabilities.map{|v| v.to_f / sum_of_probabilities}
-      matrix.inject(0.0){ |mean, col| mean + 4.times.inject(0.0){|sum,letter| sum + col[letter] * bckgr[letter]} }
-    end
-    def score_variance
-      bckgr = probabilities.map{|v| v.to_f / sum_of_probabilities}
-      matrix.inject(0.0) do |variance, col|
-        variance  + 4.times.inject(0.0) { |sum,letter| sum + col[letter]**2 * bckgr[letter] } -
-                    4.times.inject(0.0) { |sum,letter| sum + col[letter]    * bckgr[letter] }**2
-      end
-    end
-    def threshold_gauss_estimation(pvalue)
-      sigma = Math.sqrt(score_variance)
-      n_ = inverf2(1 - 2 * pvalue) * Math.sqrt(2)
-      score_mean + n_ * sigma
-    end
-    def inverf2(x)
-      sign = x < 0 ? -1 : 1
-      x = x.abs
-      a = 8 / (3*Math::PI) * (Math::PI-3) / (4-Math::PI)
-      part0 = ( 2/(Math::PI*a) + (Math.log(1-x*x)) / 2 )**2
-      part = -2 / (Math::PI * a) - Math.log(1-x*x)/2 + Math.sqrt(-1/a *
-      Math.log(1-x*x) + part0)
-      sign * Math.sqrt(part)
-    end
-  end
-end

data/lib/macroape/matrix_information.rb DELETED Viewed

@@ -1,29 +0,0 @@
-module PWM
-  module MatrixInformation
-    def length
-      @length ||= matrix.length
-    end
-    def best_score
-      @best_score ||= matrix.inject(0){|sum, col| sum + col.max}
-    end
-    def worst_score
-      @worst_score ||= matrix.inject(0){|sum, col| sum + col.min}
-    end
-    def best_suffix
-      return @best_suffix if @best_suffix
-      @best_suffix = Array.new(length + 1, 0) # best score of suffix s[i..l]
-      length.times{|i| @best_suffix[length - i - 1] = matrix[length - i - 1].max + @best_suffix[length - i] }
-      @best_suffix
-    end
-    def worst_suffix
-      return @worst_suffix if @worst_suffix
-      @worst_suffix = Array.new(length + 1, 0)
-      length.times{|i| @worst_suffix[length - i - 1] = matrix[length - i - 1].min + @worst_suffix[length - i] }
-      @worst_suffix
-    end
-    def refresh_infos
-      @length = @best_score = @worst_score = @best_suffix = @worst_suffix = nil
-      self
-    end
-  end
-end

data/lib/macroape/matrix_on_background.rb DELETED Viewed

@@ -1,16 +0,0 @@
-module PWM
-  class MatrixOnBackground < SingleMatrix
-    attr_reader :probabilities
-    def initialize(matrix,background)
-      super(matrix)
-      @probabilities = background
-    end
-    def sum_of_probabilities
-      @sum_of_probabilities ||= probabilities.inject(0.0, &:+)
-    end
-    def number_of_words
-      sum_of_probabilities ** length
-    end
-    include GaussEstimation, ThresholdByPvalue, CountByThreshold
-  end
-end

data/lib/macroape/matrix_transformations.rb DELETED Viewed

@@ -1,29 +0,0 @@
-module PWM
-  module MatrixTransformations
-    def reverse_complement
-      clone_and_transform( matrix.reverse.map(&:reverse) ).refresh_infos
-    end
-    def left_augment(n)
-      clone_and_transform( [[0.0]*4]* n + matrix ).refresh_infos
-    end
-    def right_augment(n)
-      clone_and_transform( matrix + [[0.0]*4]* n ).refresh_infos
-    end
-    def shift_to_zero # make worst score == 0 by shifting scores of each column
-      clone_and_transform( matrix.map{|col| col.map{|letter| letter - col.min}} ).refresh_infos
-    end
-    def discrete(rate)
-      clone_and_transform( matrix.map{|col| col.map{|letter| (letter * rate).ceil}} ).refresh_infos
-    end
-    def split(length_of_first_part)
-      [clone_and_transform( matrix.first(length_of_first_part)).refresh_infos, clone_and_transform(matrix.last(length - length_of_first_part)).refresh_infos]
-    end
-    def permute_columns(permutation_index)
-      clone_and_transform( permutation_index.map{|col| matrix[col]} ).refresh_infos
-    end
-    def clone_and_transform(new_matrix)
-      self.dup.instance_eval{ @matrix = new_matrix; self }
-    end
-  end
-end

data/lib/macroape/pair_metrics.rb DELETED Viewed

@@ -1,9 +0,0 @@
-module PWMCompare
-  module PairMetrics
-    def jaccard(threshold_first, threshold_second)
-      self.map_each_align do |align, alignment_info|
-        align.jaccard(threshold_first, threshold_second).merge(alignment_info)
-      end.max_by {|alignment_info| alignment_info[:similarity]}
-    end
-  end
-end

data/lib/macroape/pair_transformations.rb DELETED Viewed

@@ -1,28 +0,0 @@
-module PWMCompare
-  module PairTransformations
-    def each
-      second_rc = second.reverse_complement
-      (-second.length..first.length).to_a.product([:direct,:revcomp]) do |shift, orientation|
-        first_pwm_alignment = '.' * [-shift, 0].max + '>' * first.length
-        second_pwm_alignment = '.' * [shift, 0].max + (orientation == :direct ? '>' : '<') * second.length
-        overlap = [first.length + [-shift,0].max, second.length + [shift,0].max].min - shift.abs
-        alignment_length = [first_pwm_alignment.length, second_pwm_alignment.length].max
-        (first_pwm_alignment.length...alignment_length).each{|i| first_pwm_alignment[i] = '.'}
-        (second_pwm_alignment.length...alignment_length).each{|i| second_pwm_alignment[i] = '.'}
-        yield(PWMCompareAligned.new(first.left_augment([-shift,0].max),
-                                    (orientation == :direct ? second : second_rc).left_augment([shift,0].max)),
-              text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
-              shift: shift,
-              orientation: orientation,
-              overlap: overlap,
-              alignment_length: alignment_length
-              )
-      end
-    end
-    include Enumerable
-    alias :each_align :each
-    alias :map_each_align :map
-  end
-end