rbbt-dm 0.0.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/network/paths.rb +47 -29
- data/lib/rbbt/plots/bar.rb +152 -0
- data/lib/rbbt/plots/heatmap.rb +63 -0
- data/lib/rbbt/statistics/fdr.rb +59 -29
- data/lib/rbbt/statistics/hypergeometric.rb +176 -72
- data/lib/rbbt/statistics/random_walk.rb +285 -42
- data/test/rbbt/network/test_paths.rb +3 -3
- data/test/rbbt/statistics/test_hypergeometric.rb +24 -2
- data/test/rbbt/statistics/test_random_walk.rb +39 -0
- data/test/test_helper.rb +1 -1
- metadata +95 -70
data/lib/rbbt/network/paths.rb
CHANGED
@@ -1,71 +1,87 @@
|
|
1
1
|
require 'priority_queue'
|
2
|
+
|
2
3
|
module Paths
|
3
4
|
|
4
|
-
def self.dijkstra(adjacency, start_node, end_node = nil)
|
5
|
+
def self.dijkstra(adjacency, start_node, end_node = nil, max_steps = nil)
|
5
6
|
return nil unless adjacency.include? start_node
|
6
7
|
|
7
8
|
active = PriorityQueue.new
|
8
9
|
distances = Hash.new { 1.0 / 0.0 }
|
9
10
|
parents = Hash.new
|
10
11
|
|
11
|
-
active[start_node]
|
12
|
+
active[start_node] << 0
|
12
13
|
best = 1.0 / 0.0
|
13
14
|
until active.empty?
|
14
|
-
u
|
15
|
+
u = active.priorities.first
|
16
|
+
distance = active.shift
|
15
17
|
distances[u] = distance
|
16
18
|
d = distance + 1
|
19
|
+
path = extract_path(parents, start_node, u)
|
20
|
+
next if path.length > max_steps if max_steps
|
17
21
|
adjacency[u].each do |v|
|
18
22
|
next unless d < distances[v] and d < best # we can't relax this one
|
19
|
-
active[v] = distances[v] = d
|
20
|
-
parents[v] = u
|
21
23
|
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
24
|
+
active[v] << d if adjacency.include? v
|
25
|
+
distances[v] = d
|
26
|
+
parents[v] = u
|
22
27
|
end
|
23
28
|
end
|
24
29
|
|
25
30
|
|
26
31
|
if end_node
|
27
|
-
end_node =
|
32
|
+
end_node = end_node.select{|n| parents.keys.include? n}.first unless String === end_node
|
28
33
|
return nil if not parents.include? end_node
|
29
|
-
|
30
|
-
while not path.last === start_node
|
31
|
-
path << parents[path.last]
|
32
|
-
end
|
33
|
-
path
|
34
|
+
extract_path(parents, start_node, u)
|
34
35
|
else
|
35
36
|
parents
|
36
37
|
end
|
37
38
|
end
|
38
39
|
|
39
|
-
def self.
|
40
|
+
def self.extract_path(parents, start_node, end_node)
|
41
|
+
path = [end_node]
|
42
|
+
while not path.last === start_node
|
43
|
+
path << parents[path.last]
|
44
|
+
end
|
45
|
+
path
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.weighted_dijkstra(adjacency, start_node, end_node = nil, threshold = nil, max_steps = nil)
|
40
49
|
return nil unless adjacency.include? start_node
|
41
50
|
|
42
51
|
active = PriorityQueue.new
|
43
52
|
distances = Hash.new { 1.0 / 0.0 }
|
44
53
|
parents = Hash.new
|
45
54
|
|
46
|
-
active[start_node]
|
55
|
+
active[start_node] << 0
|
47
56
|
best = 1.0 / 0.0
|
57
|
+
found = false
|
48
58
|
until active.empty?
|
49
|
-
u
|
59
|
+
u = active.priorities.first
|
60
|
+
distance = active.shift
|
50
61
|
distances[u] = distance
|
51
|
-
|
62
|
+
path = extract_path(parents, start_node, u)
|
63
|
+
next if path.length > max_steps if max_steps
|
64
|
+
next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
|
52
65
|
Misc.zip_fields(adjacency[u]).each do |v,node_dist|
|
66
|
+
next if threshold and node_dist > threshold
|
53
67
|
d = distance + node_dist
|
54
68
|
next unless d < distances[v] and d < best # we can't relax this one
|
55
|
-
active[v]
|
69
|
+
active[v] << d
|
70
|
+
distances[v] = d
|
56
71
|
parents[v] = u
|
57
|
-
|
72
|
+
if (String === end_node ? end_node == v : end_node.include?(v))
|
73
|
+
best = d
|
74
|
+
found = true
|
75
|
+
end
|
58
76
|
end
|
59
77
|
end
|
60
78
|
|
79
|
+
return nil unless found
|
80
|
+
|
61
81
|
if end_node
|
62
82
|
end_node = (end_node & parents.keys).first unless String === end_node
|
63
83
|
return nil if not parents.include? end_node
|
64
|
-
|
65
|
-
while not path.last === start_node
|
66
|
-
path << parents[path.last]
|
67
|
-
end
|
68
|
-
path
|
84
|
+
extract_path(parents, start_node, end_node)
|
69
85
|
else
|
70
86
|
parents
|
71
87
|
end
|
@@ -78,16 +94,17 @@ module Paths
|
|
78
94
|
distances = Hash.new { 1.0 / 0.0 }
|
79
95
|
parents = Hash.new
|
80
96
|
|
81
|
-
active[start_node]
|
97
|
+
active[start_node] << 0
|
82
98
|
best = 1.0 / 0.0
|
83
99
|
until active.empty?
|
84
|
-
u
|
100
|
+
u = active.priorities.first
|
101
|
+
distance = active.shift
|
85
102
|
distances[u] = distance
|
86
103
|
next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
|
87
104
|
Misc.zip_fields(adjacency[u]).each do |v,node_dist|
|
88
105
|
d = distance + (node_dist * (l + rand))
|
89
106
|
next unless d < distances[v] and d < best # we can't relax this one
|
90
|
-
active[v]
|
107
|
+
active[v] << distances[v] = d
|
91
108
|
parents[v] = u
|
92
109
|
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
93
110
|
end
|
@@ -109,14 +126,15 @@ end
|
|
109
126
|
|
110
127
|
module Entity
|
111
128
|
module Adjacent
|
112
|
-
def path_to(adjacency, entities)
|
129
|
+
def path_to(adjacency, entities, threshold = nil, max_steps = nil)
|
113
130
|
if Array === self
|
114
|
-
self.collect{|gene| gene.path_to(adjacency, entities)}
|
131
|
+
self.collect{|gene| gene.path_to(adjacency, entities, threshold, max_steps)}
|
115
132
|
else
|
116
133
|
if adjacency.type == :flat
|
117
|
-
|
134
|
+
max_steps ||= threshold
|
135
|
+
Paths.dijkstra(adjacency, self, entities, max_steps)
|
118
136
|
else
|
119
|
-
Paths.weighted_dijkstra(adjacency, self, entities)
|
137
|
+
Paths.weighted_dijkstra(adjacency, self, entities, threshold, max_steps)
|
120
138
|
end
|
121
139
|
end
|
122
140
|
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'png'
|
2
|
+
require 'rbbt/util/misc'
|
3
|
+
|
4
|
+
module BarPlot
|
5
|
+
|
6
|
+
COLORS = {
|
7
|
+
:red => PNG::Color::Red,
|
8
|
+
:green => PNG::Color::Green,
|
9
|
+
:blue => PNG::Color::Blue,
|
10
|
+
:white => PNG::Color::White,
|
11
|
+
:black => PNG::Color::Black,
|
12
|
+
:gray => PNG::Color::Gray,
|
13
|
+
:yellow => PNG::Color::Yellow,
|
14
|
+
}
|
15
|
+
|
16
|
+
def self.get_color(color)
|
17
|
+
return color if PNG::Color === color
|
18
|
+
return COLORS[color] if COLORS.include? color
|
19
|
+
PNG::Color.from(color)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.gradient_color(values, options = {})
|
23
|
+
options = Misc.add_defaults options, :start_color => :green, :end_color => :red, :missing_color => :blue
|
24
|
+
start_color, end_color, missing_color = Misc.process_options options, :start_color, :end_color, :missing_color
|
25
|
+
|
26
|
+
start_color = get_color start_color
|
27
|
+
end_color = get_color end_color
|
28
|
+
|
29
|
+
max = values.reject{|v| v.nan? || v.infinite?}.max
|
30
|
+
min = values.reject{|v| v.nan? || v.infinite?}.min
|
31
|
+
|
32
|
+
case
|
33
|
+
when (max.nil? or min.nil?)
|
34
|
+
return [missing_color] * values.length
|
35
|
+
when max == min
|
36
|
+
return [end_color] * values.length
|
37
|
+
else
|
38
|
+
diff = max - min
|
39
|
+
|
40
|
+
r_start = start_color.r
|
41
|
+
g_start = start_color.g
|
42
|
+
b_start = start_color.b
|
43
|
+
|
44
|
+
r_end = end_color.r
|
45
|
+
g_end = end_color.g
|
46
|
+
b_end = end_color.b
|
47
|
+
|
48
|
+
values.collect{|v|
|
49
|
+
if v.infinite? or v.nan?
|
50
|
+
missing_color
|
51
|
+
else
|
52
|
+
ratio = (255 * (v - min)) / diff
|
53
|
+
r = (r_start.to_f * (1-ratio) + (r_end.to_f * ratio)).to_i
|
54
|
+
b = (b_start.to_f * (1-ratio) + (b_end.to_f * ratio)).to_i
|
55
|
+
g = (g_start.to_f * (1-ratio) + (g_end.to_f * ratio)).to_i
|
56
|
+
PNG::Color.new(r, g, b)
|
57
|
+
end
|
58
|
+
}
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.draw_hits_on_canvas(hits, total, canvas, color = PNG::Color::Black)
|
63
|
+
width = canvas.width
|
64
|
+
height = canvas.height
|
65
|
+
|
66
|
+
# fix hits
|
67
|
+
hits = hits.collect{|h| h - 1} # make it start at 0
|
68
|
+
|
69
|
+
if width < total
|
70
|
+
hits = hits.collect{|h| (h.to_f * width / total).floor}
|
71
|
+
end
|
72
|
+
|
73
|
+
if Array === color
|
74
|
+
hits.zip(color).each{|hit, color|
|
75
|
+
canvas.line hit, 0, hit , height - 1, get_color(color)
|
76
|
+
}
|
77
|
+
else
|
78
|
+
color = get_color color
|
79
|
+
hits.each{|hit|
|
80
|
+
canvas.line hit, 0, hit , height - 1, color
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
canvas
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.draw_sections_on_canvas(ranges, total, canvas, color = PNG::Color::Black)
|
88
|
+
width = canvas.width
|
89
|
+
height = canvas.height
|
90
|
+
|
91
|
+
# fix hits
|
92
|
+
ranges = ranges.collect{|r| (r.begin-1..r.end)} # make it start at 0
|
93
|
+
|
94
|
+
if width < total
|
95
|
+
ratio = width.to_f / total
|
96
|
+
ranges = ranges.collect{|range| ((range.begin.to_f*ratio).floor..(range.end.to_f*ratio).floor)} # make it start at 0
|
97
|
+
end
|
98
|
+
|
99
|
+
if Array === color
|
100
|
+
ranges.zip(color).each{|range, color|
|
101
|
+
range.each do |hit|
|
102
|
+
canvas.line hit, 0, hit , height - 1, get_color(color)
|
103
|
+
end
|
104
|
+
}
|
105
|
+
else
|
106
|
+
color = get_color color
|
107
|
+
ranges.each{|range|
|
108
|
+
range.each do |hit|
|
109
|
+
canvas.line hit, 0, hit , height - 1, color
|
110
|
+
end
|
111
|
+
}
|
112
|
+
end
|
113
|
+
|
114
|
+
canvas
|
115
|
+
end
|
116
|
+
|
117
|
+
MAX_WIDTH = 2000
|
118
|
+
def self.get_canvas(options = {})
|
119
|
+
options = Misc.add_defaults options, :width => [options[:total], MAX_WIDTH].min, :height => 20, :background => PNG::Color::White
|
120
|
+
width, height, background, canvas = Misc.process_options options, :width, :height, :background, :canvas
|
121
|
+
|
122
|
+
canvas ||= if options[:update] and options[:filename] and File.exists? options[:filename]
|
123
|
+
PNG.load_file options[:filename]
|
124
|
+
else
|
125
|
+
PNG::Canvas.new width, height, get_color(background)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def self.draw(items, total, options = {})
|
130
|
+
options = options.merge :total => total
|
131
|
+
canvas = get_canvas(options)
|
132
|
+
items = [items] if Range === items
|
133
|
+
return canvas if items.empty? and options[:filename].nil?
|
134
|
+
|
135
|
+
color = options.delete(:color) || PNG::Color::Black
|
136
|
+
if Range === items.first
|
137
|
+
draw_sections_on_canvas(items, total, canvas, color)
|
138
|
+
else
|
139
|
+
draw_hits_on_canvas(items, total, canvas, color)
|
140
|
+
end
|
141
|
+
|
142
|
+
case options[:filename]
|
143
|
+
when :string
|
144
|
+
PNG.new(canvas).to_blob
|
145
|
+
when nil
|
146
|
+
canvas
|
147
|
+
else
|
148
|
+
PNG.new(canvas).save options[:filename]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
|
3
|
+
module Heatmap
|
4
|
+
def self.heatmap(values, filename, options = {})
|
5
|
+
scale, take_log, add_to_height, colors = Misc.process_options options,
|
6
|
+
:scale, :take_log, :add_to_height, :colors
|
7
|
+
|
8
|
+
width = 200 + (values.fields.length * 16)
|
9
|
+
height = 200 + (values.length * 16)
|
10
|
+
size = [width, height].max
|
11
|
+
size = [size, 10000].min
|
12
|
+
|
13
|
+
heatmap_script = <<-EOF
|
14
|
+
#{ take_log ? "data <- log(data)" : ""}
|
15
|
+
my.hclust <- function(d){ hclust(d, method="ward") };
|
16
|
+
my.hclust <- function(d){ hclust(d) };
|
17
|
+
rbbt.png_plot(
|
18
|
+
'#{filename}',
|
19
|
+
#{ size },
|
20
|
+
#{ (defined?(add_to_height) and not add_to_height.nil?) ? (size + (add_to_height * 16 * [1, (height.to_f / width)].max).to_i) : size },
|
21
|
+
'heatmap(as.matrix(data),
|
22
|
+
#{
|
23
|
+
case scale.to_s
|
24
|
+
when "true", 'row'
|
25
|
+
'scale="row",'
|
26
|
+
when 'column'
|
27
|
+
'scale="column",'
|
28
|
+
when "none", ""
|
29
|
+
'scale="none",'
|
30
|
+
end
|
31
|
+
}
|
32
|
+
#{colors.nil? ? "" : "ColSideColors=#{colors},"}
|
33
|
+
hclustfun=my.hclust,
|
34
|
+
)',
|
35
|
+
pointsize=12, type='cairo', res=150)
|
36
|
+
data = NULL;
|
37
|
+
EOF
|
38
|
+
|
39
|
+
values.R heatmap_script
|
40
|
+
|
41
|
+
filename
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.heatmap2(values, filename, options = {})
|
45
|
+
scale, take_log, add_to_height, colors = Misc.process_options options,
|
46
|
+
:scale, :take_log, :add_to_height, :colors
|
47
|
+
|
48
|
+
width = 200 + (values.fields.length * 16)
|
49
|
+
height = 200 + (values.length * 16)
|
50
|
+
size = [width, height].max
|
51
|
+
size = [size, 20000].min
|
52
|
+
|
53
|
+
heatmap_script = <<-EOF
|
54
|
+
library(ggplot2);
|
55
|
+
|
56
|
+
EOF
|
57
|
+
|
58
|
+
values.R heatmap_script
|
59
|
+
|
60
|
+
filename
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
data/lib/rbbt/statistics/fdr.rb
CHANGED
@@ -31,17 +31,33 @@ module FDR
|
|
31
31
|
adjusted.reverse
|
32
32
|
end
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
34
|
+
inline do |builder|
|
35
|
+
|
36
|
+
builder.prefix <<-EOC_CODE
|
37
|
+
//{{{ Make compatible with 1.9 and 1.8
|
38
|
+
#ifndef RUBY_19
|
39
|
+
#ifndef RFLOAT_VALUE
|
40
|
+
#define RFLOAT_VALUE(v) (RFLOAT(v)->value)
|
41
|
+
#endif
|
42
|
+
#ifndef RARRAY_PTR
|
43
|
+
#define RARRAY_PTR(v) (RARRAY(v)->ptr)
|
44
|
+
#endif
|
45
|
+
#ifndef RARRAY_LEN
|
46
|
+
#define RARRAY_LEN(v) (RARRAY(v)->len)
|
47
|
+
#endif
|
48
|
+
#endif
|
49
|
+
//}}} Make compatible with 1.9 and 1.8
|
50
|
+
EOC_CODE
|
51
|
+
|
52
|
+
c_code = <<-EOC_CODE
|
37
53
|
double step_up_fast(VALUE ps, double rate){
|
38
54
|
long idx;
|
39
|
-
int total =
|
40
|
-
|
55
|
+
int total = RARRAY_LEN(ps);
|
56
|
+
|
41
57
|
double last_value = 0;
|
42
58
|
for (idx = 0; idx < total; idx++){
|
43
|
-
double p = (double)
|
44
|
-
|
59
|
+
double p = (double) RFLOAT_VALUE(rb_ary_entry(ps, idx));
|
60
|
+
|
45
61
|
if (p > rate * (double) (idx + 1) / (double) total){
|
46
62
|
return last_value;
|
47
63
|
}
|
@@ -51,47 +67,47 @@ module FDR
|
|
51
67
|
return last_value;
|
52
68
|
}
|
53
69
|
|
54
|
-
|
70
|
+
EOC_CODE
|
71
|
+
builder.c_singleton c_code
|
55
72
|
|
56
|
-
|
57
|
-
builder.c <<-EOC
|
58
|
-
|
73
|
+
c_code = <<-EOC_CODE
|
59
74
|
VALUE adjust_fast_self(VALUE ps){
|
60
75
|
long idx;
|
61
|
-
|
62
|
-
int total =
|
76
|
+
|
77
|
+
int total = RARRAY_LEN(ps);
|
63
78
|
|
64
79
|
VALUE new = rb_ary_new();
|
65
80
|
|
66
81
|
double last = 1;
|
67
82
|
for (idx = total - 1; idx >= 0 ; idx-- ){
|
68
|
-
double p = (double)
|
83
|
+
double p = (double) RFLOAT_VALUE(rb_ary_entry(ps, idx));
|
84
|
+
|
69
85
|
|
70
|
-
|
71
86
|
p = p * (double) total / (double) (idx + 1);
|
72
87
|
if (p > last) p = last;
|
73
88
|
last = p;
|
74
89
|
|
75
|
-
|
90
|
+
RFLOAT_VALUE(rb_ary_entry(ps, idx)) = p;
|
76
91
|
}
|
77
92
|
|
78
93
|
return ps;
|
79
94
|
}
|
80
|
-
|
81
|
-
|
82
|
-
|
95
|
+
EOC_CODE
|
96
|
+
builder.c_singleton c_code
|
97
|
+
|
98
|
+
c_code = <<-EOC_CODE
|
83
99
|
VALUE adjust_fast(VALUE ps){
|
84
100
|
long idx;
|
85
|
-
|
86
|
-
int total =
|
101
|
+
|
102
|
+
int total = RARRAY_LEN(ps);
|
87
103
|
|
88
104
|
VALUE new = rb_ary_new();
|
89
105
|
|
90
106
|
double last = 1;
|
91
107
|
for (idx = total - 1; idx >= 0 ; idx-- ){
|
92
|
-
double p = (double)
|
108
|
+
double p = (double) RFLOAT_VALUE(rb_ary_entry(ps, idx));
|
109
|
+
|
93
110
|
|
94
|
-
|
95
111
|
p = p * (double) total / (double) (idx + 1);
|
96
112
|
if (p > last) p = last;
|
97
113
|
last = p;
|
@@ -101,14 +117,17 @@ module FDR
|
|
101
117
|
|
102
118
|
return new;
|
103
119
|
}
|
104
|
-
|
105
|
-
|
120
|
+
EOC_CODE
|
121
|
+
builder.c_singleton c_code
|
122
|
+
|
123
|
+
builder
|
106
124
|
end
|
107
125
|
|
126
|
+
|
108
127
|
class << self
|
109
|
-
|
110
|
-
|
111
|
-
|
128
|
+
alias :adjust :adjust_fast
|
129
|
+
alias :adjust! :adjust_fast_self
|
130
|
+
alias :step_up :step_up_fast
|
112
131
|
end
|
113
132
|
|
114
133
|
# This will change the values of the floats in situ
|
@@ -116,13 +135,22 @@ module FDR
|
|
116
135
|
keys = []
|
117
136
|
values = []
|
118
137
|
|
119
|
-
data.
|
138
|
+
if data.respond_to? :unnamed
|
139
|
+
unnamed = data.unnamed
|
140
|
+
data.unnamed = true
|
141
|
+
end
|
142
|
+
|
143
|
+
data.collect{|key, value| [key, Array === ( v = field.nil? ? value : value[field] ) ? v.first : v] }.sort{|a,b|
|
120
144
|
a[1] <=> b[1]
|
121
145
|
}.each{|p|
|
122
146
|
keys << p[0]
|
123
147
|
values << p[1]
|
124
148
|
}
|
125
149
|
|
150
|
+
if data.respond_to? :unnamed
|
151
|
+
data.unnamed = unnamed
|
152
|
+
end
|
153
|
+
|
126
154
|
FDR.adjust!(values)
|
127
155
|
|
128
156
|
data
|
@@ -130,3 +158,5 @@ module FDR
|
|
130
158
|
|
131
159
|
end
|
132
160
|
|
161
|
+
|
162
|
+
|