RubyGems - rbbt-dm - Versions diffs - 0.0.4 → 1.0.0 - Mend

rbbt-dm 0.0.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/lib/rbbt/network/paths.rb +47 -29
data/lib/rbbt/plots/bar.rb +152 -0
data/lib/rbbt/plots/heatmap.rb +63 -0
data/lib/rbbt/statistics/fdr.rb +59 -29
data/lib/rbbt/statistics/hypergeometric.rb +176 -72
data/lib/rbbt/statistics/random_walk.rb +285 -42
data/test/rbbt/network/test_paths.rb +3 -3
data/test/rbbt/statistics/test_hypergeometric.rb +24 -2
data/test/rbbt/statistics/test_random_walk.rb +39 -0
data/test/test_helper.rb +1 -1
metadata +95 -70

data/lib/rbbt/statistics/random_walk.rb CHANGED Viewed

@@ -1,69 +1,171 @@
 require 'png'
 require 'inline'
 require 'set'
+require 'rbbt/util/misc'
 module RandomWalk
-  class << self
-      inline do |builder|
+  inline do |builder|
+    builder.prefix  <<-EOC_CODE
+#include <math.h>
+#include <time.h>
+//{{{ Make compatible with 1.9 and 1.8
+#ifndef RUBY_19
+#ifndef RFLOAT_VALUE
+#define RFLOAT_VALUE(v) (RFLOAT(v)->value)
+#endif
+#ifndef RARRAY_PTR
+#define RARRAY_PTR(v) (RARRAY(v)->ptr)
+#endif
+#ifndef RARRAY_LEN
+#define RARRAY_LEN(v) (RARRAY(v)->len)
+#endif
+#endif
+//}}} Make compatible with 1.9 and 1.8
+    EOC_CODE
+    builder.c_singleton <<-'EOC'
+    void sample_without_replacement ( int populationSize,    int sampleSize,       VALUE positions) {
+        // Use Knuth's variable names
+        int n = sampleSize;
+        int N = populationSize;
+        int t = 0; // total input records dealt with
+        int m = 0; // number of items selected so far
+        double u;
+        //srand ( (unsigned)time ( NULL ) );
+        while (m < n)
+        {
+            u = (double) rand() / ((double) RAND_MAX + 1.0);
+            if ( (N - t)*u >= n - m )
+            {
+                t++;
+            }
+            else
+            {
+                rb_ary_push(positions, rb_int_new(t));
+                t++; m++;
+            }
+        }
+    }
+    EOC
-        builder.c_raw <<-'EOC'
-    double weight(int position, int mean){
-        double rel_pos = (double) abs(position - mean) / mean;
+    builder.c_raw_singleton <<-'EOC'
+    double fitted_weight(int position, int medium){
+        double rel_pos = (double) abs(position - medium) / medium;
         double weight =  0.3 *  0.5 * rel_pos +  0.7 * (exp(30*rel_pos)/exp(30));
         return(weight);
     }
-        EOC
+    EOC
-        builder.c <<-'EOC'
-    double fast_score_scale(VALUE positions, int total, int missing){
+    builder.c_singleton <<-'EOC'
+    double score_fitted_weight(VALUE positions, int total, int missing){
       int idx;
-      int mean = total / 2;
-      VALUE rel_q = rb_ary_new();
+      int medium = total / 2;
+      int position;
+      double penalty;
+      double max_top, max_bottom;
+      double hit_weights = 0;
       VALUE rel_l = rb_ary_new();
+      VALUE rel_q = rb_ary_new();
       rb_ary_push(rel_q,rb_float_new(0));
       // Rescale positions and accumulate weights
-      double total_weights = 0;
-      for (idx = 0; idx < RARRAY(positions)->len; idx++){
-        int position = FIX2INT(rb_ary_entry(positions, idx));
+      for (idx = 0; idx < RARRAY_LEN(positions); idx++){
+        position = FIX2INT(rb_ary_entry(positions, idx));
         rb_ary_push(rel_l, rb_float_new((double) position / total));
-        total_weights += weight(position, mean);
-        rb_ary_push(rel_q, rb_float_new(total_weights));
+        hit_weights += fitted_weight(position, medium);
+        rb_ary_push(rel_q, rb_float_new(hit_weights));
       }
       // Add penalty for missing genes
-      double penalty = missing * weight(mean * 0.8, mean);
-      total_weights  = total_weights + penalty;
-      // Traverse list and get extreme values
+      penalty = missing * fitted_weight(medium * 0.8, medium);
+      hit_weights  = hit_weights + penalty;
+      // Traverse list and get extreme values of:
+      // Proportion of weight covered - Proportion of hits covered
+      max_top = max_bottom = 0;
+      for (idx = 0; idx < RARRAY_LEN(positions); idx++){
+        double top    = RFLOAT_VALUE(rb_ary_entry(rel_q, idx + 1)) / hit_weights -
+                        RFLOAT_VALUE(rb_ary_entry(rel_l, idx));
+        double bottom = - (penalty + RFLOAT_VALUE(rb_ary_entry(rel_q, idx))) / hit_weights +
+                        RFLOAT_VALUE(rb_ary_entry(rel_l, idx));
+        if (top > max_top)       max_top    = top;
+        if (bottom > max_bottom) max_bottom = bottom;
+      }
+     if (max_top > max_bottom) return max_top;
+     else                      return -max_bottom;
+    }
+    EOC
+    builder.c_singleton <<-'EOC'
+    double score_custom_weights(VALUE positions, VALUE weights, int total_weights, int total, int missing){
+      int idx;
+      int medium = total / 2;
+      int position;
+      double penalty;
       double max_top, max_bottom;
+      double hit_weights = 0;
+      VALUE rel_l = rb_ary_new();
+      VALUE rel_q = rb_ary_new();
+      rb_ary_push(rel_q,rb_float_new(0));
+      // Rescale positions and accumulate weights
+      for (idx = 0; idx < RARRAY_LEN(positions); idx++){
+        position = FIX2INT(rb_ary_entry(positions, idx));
+        rb_ary_push(rel_l, rb_float_new((double) position / total));
+        hit_weights += rb_ary_entry(weights, position);
+        rb_ary_push(rel_q, rb_float_new(hit_weights / total_weights));
+      }
+      // Add penalty for missing genes
+      penalty = missing * rb_ary_entry(weights, (int) medium * 0.8);
+      hit_weights  = hit_weights + penalty;
+      hit_weights = hit_weights / total_weights;
+      // Traverse list and get extreme values of:
+      // Proportion of weight covered - Proportion of hits covered
       max_top = max_bottom = 0;
-      for (idx = 0; idx < RARRAY(positions)->len; idx++){
-        double top    = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
-                        RFLOAT(rb_ary_entry(rel_l, idx))->value;
-        double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
-                        RFLOAT(rb_ary_entry(rel_l, idx))->value;
+      for (idx = 0; idx < RARRAY_LEN(positions); idx++){
+        double top    = RFLOAT_VALUE(rb_ary_entry(rel_q, idx + 1)) / hit_weights -
+                        RFLOAT_VALUE(rb_ary_entry(rel_l, idx));
+        double bottom = - (penalty + RFLOAT_VALUE(rb_ary_entry(rel_q, idx))) / hit_weights +
+                        RFLOAT_VALUE(rb_ary_entry(rel_l, idx));
         if (top > max_top)       max_top    = top;
         if (bottom > max_bottom) max_bottom = bottom;
       }
      if (max_top > max_bottom) return max_top;
      else                      return -max_bottom;
     }
-        EOC
+    EOC
-      end
   end
   class << self
-    alias score fast_score_scale
+    alias score score_fitted_weight
+    alias score_weights score_custom_weights
   end
   def self.combine(up, down)
@@ -90,10 +192,9 @@ module RandomWalk
     if size == 0
       [0] * times
     else
-      a = (0..total - 1).to_a
       (1..times).collect do
-        a.shuffle!
-        score(a[1..size].sort, total, missing).abs
+        p = Misc.random_sample_in_range(total, size)
+        score(p.sort, total, missing).abs
       end
     end
   end
@@ -121,6 +222,7 @@ module RandomWalk
   }
   def self.draw_hits(hits, total, filename = nil, options = {})
+    update = options[:update]
     size = options[:size] || total
     bg_color = options[:bg_color] || :white
@@ -128,14 +230,13 @@ module RandomWalk
     sections = options[:sections] || []
     size = [size, total].min
+    canvas = PNG::Canvas.new size, width, COLORS[bg_color] || PNG::Color.from(bg_color)
     hits = hits.collect{|h| h - 1}
     if size < total
       hits = hits.collect{|h| (h.to_f * size / total).to_i}
     end
-    canvas = PNG::Canvas.new size, width, COLORS[bg_color] || PNG::Color.from(bg_color)
     sections.each{|color, info|
       start = info[0]
       finish = info[1]
@@ -161,11 +262,24 @@ module RandomWalk
 end
 module OrderedList
+  attr_accessor :weights, :total_weights
+  def self.setup(list, weights = nil, total_weights = nil)
+    list.extend OrderedList
+    list.weights = weights
+    if weights and total_weights.nil?
+      list.total_weights = Misc.sum(weights)
+    else
+      list.total_weights = total_weights
+    end
+    list
+  end
   def self.hits(list, set)
     set = Set.new(set) unless Set === set
     hits = []
     list.each_with_index do |e,i|
-      hits << i if set.include? e
+      hits << i + 1 if set.include? e # count from 1
     end
     hits
   end
@@ -179,15 +293,144 @@ module OrderedList
     OrderedList.hits(self, set)
   end
+  def score(set)
+    hits = hits(set)
+    RandomWalk.score(hits.sort, self.length, 0)
+  end
+  def score_weights(set)
+    raise "No weight defined" if @weights.nil?
+    @total_weights ||= Misc.sum(@weights)
+    hits = hits(set)
+    RandomWalk.score_weights(hits.sort, @weights, @total_weights, self.length, 0)
+  end
   def draw_hits(set, filename = nil, options = {})
     OrderedList.draw_hits(self, set, filename, options)
   end
-  def pvalue(set, options = {})
-    options = Misc.add_defaults options, :permutations => 1000, :missing => 0
-    hits = hits(set.compact)
-    score = RandomWalk.score(hits.sort, self.length, 0)
-    permutations = RandomWalk.permutations(set.length, self.length, options[:missing], options[:permutations])
-    RandomWalk.pvalue(permutations, score)
+  #def pvalue(set, options = {})
+  #  set = Set.new(set.compact) unless Set === set
+  #  options = Misc.add_defaults options, :permutations => 10000, :missing => 0
+  #  hits = hits(set)
+  #  score = RandomWalk.score(hits.sort, self.length, 0)
+  #  permutations = RandomWalk.permutations(set.length, self.length, options[:missing], options[:permutations])
+  #  RandomWalk.pvalue(permutations, score)
+  #end
+  def pvalue(set, cutoff = 0.1, options = {})
+    set = Set.new(set.compact) unless Set === set
+    options = Misc.add_defaults options, :permutations => 10000, :missing => 0
+    permutations, missing = Misc.process_options options, :permutations, :missing
+    hits = hits(set)
+    return 1.0 if hits.empty?
+    target_score = RandomWalk.score(hits.sort, self.length, 0)
+    target_score_abs = target_score.abs
+    max = (permutations.to_f * cutoff).ceil
+    size = set.length
+    total = self.length
+    better_permutation_score_count = 1
+    if size == 0
+      1.0
+    else
+      (1..permutations).each do
+        p= []
+        RandomWalk.sample_without_replacement(total, size, p)
+        permutation_score = RandomWalk.score(p.sort, total, missing).abs
+        if permutation_score.abs > target_score_abs
+          better_permutation_score_count += 1
+        end
+        return 1.0 if better_permutation_score_count > max
+      end
+      p = better_permutation_score_count.to_f / permutations
+      p = -p if target_score < 0
+      p
+    end
+  end
+  def pvalue_weights(set, cutoff = 0.1, options = {})
+    raise "No weight defined" if @weights.nil?
+    @total_weights ||= Misc.sum(@weights)
+    set = Set.new(set.compact) unless Set === set
+    options = Misc.add_defaults options, :permutations => 10000, :missing => 0
+    permutations, missing = Misc.process_options options, :permutations, :missing
+    hits = hits(set)
+    return 1.0 if hits.empty?
+    target_score = RandomWalk.score_weights(hits.sort, @weights, @total_weights, self.length, 0)
+    target_score_abs = target_score.abs
+    max = (permutations.to_f * cutoff).ceil
+    size = set.length
+    total = self.length
+    better_permutation_score_count = 1
+    if size == 0
+      1.0
+    else
+      (1..permutations).each do
+        p= []
+        RandomWalk.sample_without_replacement(total, size, p)
+        permutation_score = RandomWalk.score_weights(p.sort, @weights, @total_weights, total, missing).abs
+        if permutation_score.abs > target_score_abs
+          better_permutation_score_count += 1
+        end
+        return 1.0 if better_permutation_score_count > max
+      end
+      p = better_permutation_score_count.to_f / permutations
+      p = -p if target_score < 0
+      p
+    end
+  end
+end
+module TSV
+  def self.rank_enrichment_for_list(list, hits, options = {})
+    cutoff = Misc.process_options options, :cutoff
+    list.extend OrderedList
+    if cutoff
+      list.pvalue(hits, cutoff, options)
+    else
+      list.pvalue(hits, options)
+    end
+  end
+  def self.rank_enrichment(tsv, list, options = {})
+    if tsv.fields
+      res = TSV.setup({}, :cast => :to_f, :type => :double, :key_field => tsv.key_field, :fields => ["p-value", tsv.fields.first])
+    else
+      res = TSV.setup({}, :cast => :to_f, :type => :double)
+    end
+    tsv.with_monitor do
+      tsv.with_unnamed do
+        tsv.through do |key, values|
+          pvalue = rank_enrichment_for_list(list, values, options)
+          res[key] = [pvalue, (values.respond_to?(:subset) ? values.subset(list) :  values - list)]
+        end
+      end
+    end
+    FDR.adjust_hash! res, 0 if options[:fdr]
+    res
+  end
+  def rank_enrichment(list, options = {})
+    TSV.rank_enrichment(self, list, options)
   end
 end

data/test/rbbt/network/test_paths.rb CHANGED Viewed

@@ -6,7 +6,7 @@ require 'set'
 class TestNetwork < Test::Unit::TestCase
-  def test_dijsktra
+  def _test_dijsktra
     string = STRING.protein_protein.tsv :persist => false, :fields => ["Interactor Ensembl Protein ID"], :type => :flat
     string.unnamed = true
@@ -22,11 +22,11 @@ class TestNetwork < Test::Unit::TestCase
   def test_weighted_dijsktra
     string = STRING.protein_protein.tsv
+    string.unnamed = true
     string.process "Score" do |scores|
       scores.collect{|score| 1000 - score.to_i}
     end
-    string.unnamed = true
     start_node = "ENSP00000256078"
     end_node = "ENSP00000306245"
@@ -45,7 +45,7 @@ class TestNetwork < Test::Unit::TestCase
   end
-  def test_random_weighted_dijsktra
+  def _test_random_weighted_dijsktra
     string = STRING.protein_protein.tsv
     string.process "Score" do |scores|

data/test/rbbt/statistics/test_hypergeometric.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'test/unit'
 class TestHypergeometric < Test::Unit::TestCase
   def test_hypergeometric
-    assert Hypergeometric.hypergeometric(100, 20, 15,13) < 0.05
+    assert Hypergeometric.hypergeometric(100, 20, 15, 13) < 0.0005
   end
   def test_annotation_counts
@@ -38,7 +38,29 @@ row7    A    B    Id3
     TmpFile.with_file(content) do |filename|
       tsv = TSV.open(filename, :sep => /\s+/)
-      assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA", :fdr => false).collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
+      assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA", :fdr => false).collect{|annot, values| pvalue = values.first.first.to_f; pvalue < 0.05 ? annot : nil}.compact
     end
   end
+  def test_enrichement_with_background
+     content =<<-EOF
+#Id    ValueA    ValueB    OtherID
+row1    a|aa|aaa    b    Id1|Id2
+row2    A    B    Id3
+row3    a    C    Id4
+row4    a    B    Id3
+row5    a    B    Id3
+row6    A    B    Id3
+row7    A    B    Id3
+    EOF
+    TmpFile.with_file(content) do |filename|
+      tsv = TSV.open(filename, :sep => /\s+/)
+      assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA", :fdr => false, :background => %w(row1 row2 row3 row4 row5 row6 row7)).collect{|annot, values| pvalue = values.first.first.to_f; pvalue < 0.05 ? annot : nil}.compact
+      ddd tsv.enrichment(%w(row3 row4 row5), "ValueA", :fdr => false, :background => %w(row1 row2 row3 row4 row5))
+      assert_equal %w(), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA", :fdr => false, :background => %w(row1 row2 row3 row4 row5)).collect{|annot, values| pvalue = values.first.first.to_f; pvalue < 0.05 ? annot : nil}.compact
+    end
+  end
 end

data/test/rbbt/statistics/test_random_walk.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
+require 'rbbt/statistics/random_walk'
+require 'test/unit'
+class TestRandomWalk < Test::Unit::TestCase
+  def test_score_weight
+    list = (1..1000).to_a
+    list.extend OrderedList
+    weights = list.collect{|v| (Misc.mean(list) - v)**2}
+    weights_total = Misc.sum(weights)
+    assert RandomWalk.score_custom_weights((1..100).to_a, weights, weights_total, list.length, 0) >
+    RandomWalk.score_custom_weights([100, 200, 300, 400, 500], weights, weights_total, list.length, 0)
+  end
+  def test_pvalue
+    list = (1..1000).to_a
+    list.extend OrderedList
+    assert list.pvalue((1..100).to_a, 0.05) < 0.05
+    assert list.pvalue([100, 200, 300, 400, 500], 0.05) > 0.05
+  end
+  def test_pvalue_weights
+    list = (1..1000).to_a
+    weights = list.collect{|v| (Misc.mean(list) - v)**2}
+    weights_total = Misc.sum(weights)
+    OrderedList.setup(list, weights, weights_total)
+    assert list.pvalue_weights((1..100).to_a, 0.05) < 0.05
+    assert list.pvalue_weights([100, 200, 300, 400, 500], 0.05) > 0.05
+  end
+end

data/test/test_helper.rb CHANGED Viewed

@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
 $LOAD_PATH.unshift(File.dirname(__FILE__))
 class Test::Unit::TestCase
-  def test_datafile(file)
+  def get_test_datafile(file)
     File.join(File.dirname(__FILE__), 'data', file)
   end
 end