rbbt-dm 0.0.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/network/paths.rb +47 -29
- data/lib/rbbt/plots/bar.rb +152 -0
- data/lib/rbbt/plots/heatmap.rb +63 -0
- data/lib/rbbt/statistics/fdr.rb +59 -29
- data/lib/rbbt/statistics/hypergeometric.rb +176 -72
- data/lib/rbbt/statistics/random_walk.rb +285 -42
- data/test/rbbt/network/test_paths.rb +3 -3
- data/test/rbbt/statistics/test_hypergeometric.rb +24 -2
- data/test/rbbt/statistics/test_random_walk.rb +39 -0
- data/test/test_helper.rb +1 -1
- metadata +95 -70
data/lib/rbbt/network/paths.rb
CHANGED
@@ -1,71 +1,87 @@
|
|
1
1
|
require 'priority_queue'
|
2
|
+
|
2
3
|
module Paths
|
3
4
|
|
4
|
-
def self.dijkstra(adjacency, start_node, end_node = nil)
|
5
|
+
def self.dijkstra(adjacency, start_node, end_node = nil, max_steps = nil)
|
5
6
|
return nil unless adjacency.include? start_node
|
6
7
|
|
7
8
|
active = PriorityQueue.new
|
8
9
|
distances = Hash.new { 1.0 / 0.0 }
|
9
10
|
parents = Hash.new
|
10
11
|
|
11
|
-
active[start_node]
|
12
|
+
active[start_node] << 0
|
12
13
|
best = 1.0 / 0.0
|
13
14
|
until active.empty?
|
14
|
-
u
|
15
|
+
u = active.priorities.first
|
16
|
+
distance = active.shift
|
15
17
|
distances[u] = distance
|
16
18
|
d = distance + 1
|
19
|
+
path = extract_path(parents, start_node, u)
|
20
|
+
next if path.length > max_steps if max_steps
|
17
21
|
adjacency[u].each do |v|
|
18
22
|
next unless d < distances[v] and d < best # we can't relax this one
|
19
|
-
active[v] = distances[v] = d
|
20
|
-
parents[v] = u
|
21
23
|
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
24
|
+
active[v] << d if adjacency.include? v
|
25
|
+
distances[v] = d
|
26
|
+
parents[v] = u
|
22
27
|
end
|
23
28
|
end
|
24
29
|
|
25
30
|
|
26
31
|
if end_node
|
27
|
-
end_node =
|
32
|
+
end_node = end_node.select{|n| parents.keys.include? n}.first unless String === end_node
|
28
33
|
return nil if not parents.include? end_node
|
29
|
-
|
30
|
-
while not path.last === start_node
|
31
|
-
path << parents[path.last]
|
32
|
-
end
|
33
|
-
path
|
34
|
+
extract_path(parents, start_node, u)
|
34
35
|
else
|
35
36
|
parents
|
36
37
|
end
|
37
38
|
end
|
38
39
|
|
39
|
-
def self.
|
40
|
+
def self.extract_path(parents, start_node, end_node)
|
41
|
+
path = [end_node]
|
42
|
+
while not path.last === start_node
|
43
|
+
path << parents[path.last]
|
44
|
+
end
|
45
|
+
path
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.weighted_dijkstra(adjacency, start_node, end_node = nil, threshold = nil, max_steps = nil)
|
40
49
|
return nil unless adjacency.include? start_node
|
41
50
|
|
42
51
|
active = PriorityQueue.new
|
43
52
|
distances = Hash.new { 1.0 / 0.0 }
|
44
53
|
parents = Hash.new
|
45
54
|
|
46
|
-
active[start_node]
|
55
|
+
active[start_node] << 0
|
47
56
|
best = 1.0 / 0.0
|
57
|
+
found = false
|
48
58
|
until active.empty?
|
49
|
-
u
|
59
|
+
u = active.priorities.first
|
60
|
+
distance = active.shift
|
50
61
|
distances[u] = distance
|
51
|
-
|
62
|
+
path = extract_path(parents, start_node, u)
|
63
|
+
next if path.length > max_steps if max_steps
|
64
|
+
next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
|
52
65
|
Misc.zip_fields(adjacency[u]).each do |v,node_dist|
|
66
|
+
next if threshold and node_dist > threshold
|
53
67
|
d = distance + node_dist
|
54
68
|
next unless d < distances[v] and d < best # we can't relax this one
|
55
|
-
active[v]
|
69
|
+
active[v] << d
|
70
|
+
distances[v] = d
|
56
71
|
parents[v] = u
|
57
|
-
|
72
|
+
if (String === end_node ? end_node == v : end_node.include?(v))
|
73
|
+
best = d
|
74
|
+
found = true
|
75
|
+
end
|
58
76
|
end
|
59
77
|
end
|
60
78
|
|
79
|
+
return nil unless found
|
80
|
+
|
61
81
|
if end_node
|
62
82
|
end_node = (end_node & parents.keys).first unless String === end_node
|
63
83
|
return nil if not parents.include? end_node
|
64
|
-
|
65
|
-
while not path.last === start_node
|
66
|
-
path << parents[path.last]
|
67
|
-
end
|
68
|
-
path
|
84
|
+
extract_path(parents, start_node, end_node)
|
69
85
|
else
|
70
86
|
parents
|
71
87
|
end
|
@@ -78,16 +94,17 @@ module Paths
|
|
78
94
|
distances = Hash.new { 1.0 / 0.0 }
|
79
95
|
parents = Hash.new
|
80
96
|
|
81
|
-
active[start_node]
|
97
|
+
active[start_node] << 0
|
82
98
|
best = 1.0 / 0.0
|
83
99
|
until active.empty?
|
84
|
-
u
|
100
|
+
u = active.priorities.first
|
101
|
+
distance = active.shift
|
85
102
|
distances[u] = distance
|
86
103
|
next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
|
87
104
|
Misc.zip_fields(adjacency[u]).each do |v,node_dist|
|
88
105
|
d = distance + (node_dist * (l + rand))
|
89
106
|
next unless d < distances[v] and d < best # we can't relax this one
|
90
|
-
active[v]
|
107
|
+
active[v] << distances[v] = d
|
91
108
|
parents[v] = u
|
92
109
|
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
93
110
|
end
|
@@ -109,14 +126,15 @@ end
|
|
109
126
|
|
110
127
|
module Entity
|
111
128
|
module Adjacent
|
112
|
-
def path_to(adjacency, entities)
|
129
|
+
def path_to(adjacency, entities, threshold = nil, max_steps = nil)
|
113
130
|
if Array === self
|
114
|
-
self.collect{|gene| gene.path_to(adjacency, entities)}
|
131
|
+
self.collect{|gene| gene.path_to(adjacency, entities, threshold, max_steps)}
|
115
132
|
else
|
116
133
|
if adjacency.type == :flat
|
117
|
-
|
134
|
+
max_steps ||= threshold
|
135
|
+
Paths.dijkstra(adjacency, self, entities, max_steps)
|
118
136
|
else
|
119
|
-
Paths.weighted_dijkstra(adjacency, self, entities)
|
137
|
+
Paths.weighted_dijkstra(adjacency, self, entities, threshold, max_steps)
|
120
138
|
end
|
121
139
|
end
|
122
140
|
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'png'
|
2
|
+
require 'rbbt/util/misc'
|
3
|
+
|
4
|
+
module BarPlot
|
5
|
+
|
6
|
+
COLORS = {
|
7
|
+
:red => PNG::Color::Red,
|
8
|
+
:green => PNG::Color::Green,
|
9
|
+
:blue => PNG::Color::Blue,
|
10
|
+
:white => PNG::Color::White,
|
11
|
+
:black => PNG::Color::Black,
|
12
|
+
:gray => PNG::Color::Gray,
|
13
|
+
:yellow => PNG::Color::Yellow,
|
14
|
+
}
|
15
|
+
|
16
|
+
def self.get_color(color)
|
17
|
+
return color if PNG::Color === color
|
18
|
+
return COLORS[color] if COLORS.include? color
|
19
|
+
PNG::Color.from(color)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.gradient_color(values, options = {})
|
23
|
+
options = Misc.add_defaults options, :start_color => :green, :end_color => :red, :missing_color => :blue
|
24
|
+
start_color, end_color, missing_color = Misc.process_options options, :start_color, :end_color, :missing_color
|
25
|
+
|
26
|
+
start_color = get_color start_color
|
27
|
+
end_color = get_color end_color
|
28
|
+
|
29
|
+
max = values.reject{|v| v.nan? || v.infinite?}.max
|
30
|
+
min = values.reject{|v| v.nan? || v.infinite?}.min
|
31
|
+
|
32
|
+
case
|
33
|
+
when (max.nil? or min.nil?)
|
34
|
+
return [missing_color] * values.length
|
35
|
+
when max == min
|
36
|
+
return [end_color] * values.length
|
37
|
+
else
|
38
|
+
diff = max - min
|
39
|
+
|
40
|
+
r_start = start_color.r
|
41
|
+
g_start = start_color.g
|
42
|
+
b_start = start_color.b
|
43
|
+
|
44
|
+
r_end = end_color.r
|
45
|
+
g_end = end_color.g
|
46
|
+
b_end = end_color.b
|
47
|
+
|
48
|
+
values.collect{|v|
|
49
|
+
if v.infinite? or v.nan?
|
50
|
+
missing_color
|
51
|
+
else
|
52
|
+
ratio = (255 * (v - min)) / diff
|
53
|
+
r = (r_start.to_f * (1-ratio) + (r_end.to_f * ratio)).to_i
|
54
|
+
b = (b_start.to_f * (1-ratio) + (b_end.to_f * ratio)).to_i
|
55
|
+
g = (g_start.to_f * (1-ratio) + (g_end.to_f * ratio)).to_i
|
56
|
+
PNG::Color.new(r, g, b)
|
57
|
+
end
|
58
|
+
}
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.draw_hits_on_canvas(hits, total, canvas, color = PNG::Color::Black)
|
63
|
+
width = canvas.width
|
64
|
+
height = canvas.height
|
65
|
+
|
66
|
+
# fix hits
|
67
|
+
hits = hits.collect{|h| h - 1} # make it start at 0
|
68
|
+
|
69
|
+
if width < total
|
70
|
+
hits = hits.collect{|h| (h.to_f * width / total).floor}
|
71
|
+
end
|
72
|
+
|
73
|
+
if Array === color
|
74
|
+
hits.zip(color).each{|hit, color|
|
75
|
+
canvas.line hit, 0, hit , height - 1, get_color(color)
|
76
|
+
}
|
77
|
+
else
|
78
|
+
color = get_color color
|
79
|
+
hits.each{|hit|
|
80
|
+
canvas.line hit, 0, hit , height - 1, color
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
canvas
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.draw_sections_on_canvas(ranges, total, canvas, color = PNG::Color::Black)
|
88
|
+
width = canvas.width
|
89
|
+
height = canvas.height
|
90
|
+
|
91
|
+
# fix hits
|
92
|
+
ranges = ranges.collect{|r| (r.begin-1..r.end)} # make it start at 0
|
93
|
+
|
94
|
+
if width < total
|
95
|
+
ratio = width.to_f / total
|
96
|
+
ranges = ranges.collect{|range| ((range.begin.to_f*ratio).floor..(range.end.to_f*ratio).floor)} # make it start at 0
|
97
|
+
end
|
98
|
+
|
99
|
+
if Array === color
|
100
|
+
ranges.zip(color).each{|range, color|
|
101
|
+
range.each do |hit|
|
102
|
+
canvas.line hit, 0, hit , height - 1, get_color(color)
|
103
|
+
end
|
104
|
+
}
|
105
|
+
else
|
106
|
+
color = get_color color
|
107
|
+
ranges.each{|range|
|
108
|
+
range.each do |hit|
|
109
|
+
canvas.line hit, 0, hit , height - 1, color
|
110
|
+
end
|
111
|
+
}
|
112
|
+
end
|
113
|
+
|
114
|
+
canvas
|
115
|
+
end
|
116
|
+
|
117
|
+
MAX_WIDTH = 2000
|
118
|
+
def self.get_canvas(options = {})
|
119
|
+
options = Misc.add_defaults options, :width => [options[:total], MAX_WIDTH].min, :height => 20, :background => PNG::Color::White
|
120
|
+
width, height, background, canvas = Misc.process_options options, :width, :height, :background, :canvas
|
121
|
+
|
122
|
+
canvas ||= if options[:update] and options[:filename] and File.exists? options[:filename]
|
123
|
+
PNG.load_file options[:filename]
|
124
|
+
else
|
125
|
+
PNG::Canvas.new width, height, get_color(background)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def self.draw(items, total, options = {})
|
130
|
+
options = options.merge :total => total
|
131
|
+
canvas = get_canvas(options)
|
132
|
+
items = [items] if Range === items
|
133
|
+
return canvas if items.empty? and options[:filename].nil?
|
134
|
+
|
135
|
+
color = options.delete(:color) || PNG::Color::Black
|
136
|
+
if Range === items.first
|
137
|
+
draw_sections_on_canvas(items, total, canvas, color)
|
138
|
+
else
|
139
|
+
draw_hits_on_canvas(items, total, canvas, color)
|
140
|
+
end
|
141
|
+
|
142
|
+
case options[:filename]
|
143
|
+
when :string
|
144
|
+
PNG.new(canvas).to_blob
|
145
|
+
when nil
|
146
|
+
canvas
|
147
|
+
else
|
148
|
+
PNG.new(canvas).save options[:filename]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
|
3
|
+
module Heatmap
|
4
|
+
def self.heatmap(values, filename, options = {})
|
5
|
+
scale, take_log, add_to_height, colors = Misc.process_options options,
|
6
|
+
:scale, :take_log, :add_to_height, :colors
|
7
|
+
|
8
|
+
width = 200 + (values.fields.length * 16)
|
9
|
+
height = 200 + (values.length * 16)
|
10
|
+
size = [width, height].max
|
11
|
+
size = [size, 10000].min
|
12
|
+
|
13
|
+
heatmap_script = <<-EOF
|
14
|
+
#{ take_log ? "data <- log(data)" : ""}
|
15
|
+
my.hclust <- function(d){ hclust(d, method="ward") };
|
16
|
+
my.hclust <- function(d){ hclust(d) };
|
17
|
+
rbbt.png_plot(
|
18
|
+
'#{filename}',
|
19
|
+
#{ size },
|
20
|
+
#{ (defined?(add_to_height) and not add_to_height.nil?) ? (size + (add_to_height * 16 * [1, (height.to_f / width)].max).to_i) : size },
|
21
|
+
'heatmap(as.matrix(data),
|
22
|
+
#{
|
23
|
+
case scale.to_s
|
24
|
+
when "true", 'row'
|
25
|
+
'scale="row",'
|
26
|
+
when 'column'
|
27
|
+
'scale="column",'
|
28
|
+
when "none", ""
|
29
|
+
'scale="none",'
|
30
|
+
end
|
31
|
+
}
|
32
|
+
#{colors.nil? ? "" : "ColSideColors=#{colors},"}
|
33
|
+
hclustfun=my.hclust,
|
34
|
+
)',
|
35
|
+
pointsize=12, type='cairo', res=150)
|
36
|
+
data = NULL;
|
37
|
+
EOF
|
38
|
+
|
39
|
+
values.R heatmap_script
|
40
|
+
|
41
|
+
filename
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.heatmap2(values, filename, options = {})
|
45
|
+
scale, take_log, add_to_height, colors = Misc.process_options options,
|
46
|
+
:scale, :take_log, :add_to_height, :colors
|
47
|
+
|
48
|
+
width = 200 + (values.fields.length * 16)
|
49
|
+
height = 200 + (values.length * 16)
|
50
|
+
size = [width, height].max
|
51
|
+
size = [size, 20000].min
|
52
|
+
|
53
|
+
heatmap_script = <<-EOF
|
54
|
+
library(ggplot2);
|
55
|
+
|
56
|
+
EOF
|
57
|
+
|
58
|
+
values.R heatmap_script
|
59
|
+
|
60
|
+
filename
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
data/lib/rbbt/statistics/fdr.rb
CHANGED
@@ -31,17 +31,33 @@ module FDR
|
|
31
31
|
adjusted.reverse
|
32
32
|
end
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
34
|
+
inline do |builder|
|
35
|
+
|
36
|
+
builder.prefix <<-EOC_CODE
|
37
|
+
//{{{ Make compatible with 1.9 and 1.8
|
38
|
+
#ifndef RUBY_19
|
39
|
+
#ifndef RFLOAT_VALUE
|
40
|
+
#define RFLOAT_VALUE(v) (RFLOAT(v)->value)
|
41
|
+
#endif
|
42
|
+
#ifndef RARRAY_PTR
|
43
|
+
#define RARRAY_PTR(v) (RARRAY(v)->ptr)
|
44
|
+
#endif
|
45
|
+
#ifndef RARRAY_LEN
|
46
|
+
#define RARRAY_LEN(v) (RARRAY(v)->len)
|
47
|
+
#endif
|
48
|
+
#endif
|
49
|
+
//}}} Make compatible with 1.9 and 1.8
|
50
|
+
EOC_CODE
|
51
|
+
|
52
|
+
c_code = <<-EOC_CODE
|
37
53
|
double step_up_fast(VALUE ps, double rate){
|
38
54
|
long idx;
|
39
|
-
int total =
|
40
|
-
|
55
|
+
int total = RARRAY_LEN(ps);
|
56
|
+
|
41
57
|
double last_value = 0;
|
42
58
|
for (idx = 0; idx < total; idx++){
|
43
|
-
double p = (double)
|
44
|
-
|
59
|
+
double p = (double) RFLOAT_VALUE(rb_ary_entry(ps, idx));
|
60
|
+
|
45
61
|
if (p > rate * (double) (idx + 1) / (double) total){
|
46
62
|
return last_value;
|
47
63
|
}
|
@@ -51,47 +67,47 @@ module FDR
|
|
51
67
|
return last_value;
|
52
68
|
}
|
53
69
|
|
54
|
-
|
70
|
+
EOC_CODE
|
71
|
+
builder.c_singleton c_code
|
55
72
|
|
56
|
-
|
57
|
-
builder.c <<-EOC
|
58
|
-
|
73
|
+
c_code = <<-EOC_CODE
|
59
74
|
VALUE adjust_fast_self(VALUE ps){
|
60
75
|
long idx;
|
61
|
-
|
62
|
-
int total =
|
76
|
+
|
77
|
+
int total = RARRAY_LEN(ps);
|
63
78
|
|
64
79
|
VALUE new = rb_ary_new();
|
65
80
|
|
66
81
|
double last = 1;
|
67
82
|
for (idx = total - 1; idx >= 0 ; idx-- ){
|
68
|
-
double p = (double)
|
83
|
+
double p = (double) RFLOAT_VALUE(rb_ary_entry(ps, idx));
|
84
|
+
|
69
85
|
|
70
|
-
|
71
86
|
p = p * (double) total / (double) (idx + 1);
|
72
87
|
if (p > last) p = last;
|
73
88
|
last = p;
|
74
89
|
|
75
|
-
|
90
|
+
RFLOAT_VALUE(rb_ary_entry(ps, idx)) = p;
|
76
91
|
}
|
77
92
|
|
78
93
|
return ps;
|
79
94
|
}
|
80
|
-
|
81
|
-
|
82
|
-
|
95
|
+
EOC_CODE
|
96
|
+
builder.c_singleton c_code
|
97
|
+
|
98
|
+
c_code = <<-EOC_CODE
|
83
99
|
VALUE adjust_fast(VALUE ps){
|
84
100
|
long idx;
|
85
|
-
|
86
|
-
int total =
|
101
|
+
|
102
|
+
int total = RARRAY_LEN(ps);
|
87
103
|
|
88
104
|
VALUE new = rb_ary_new();
|
89
105
|
|
90
106
|
double last = 1;
|
91
107
|
for (idx = total - 1; idx >= 0 ; idx-- ){
|
92
|
-
double p = (double)
|
108
|
+
double p = (double) RFLOAT_VALUE(rb_ary_entry(ps, idx));
|
109
|
+
|
93
110
|
|
94
|
-
|
95
111
|
p = p * (double) total / (double) (idx + 1);
|
96
112
|
if (p > last) p = last;
|
97
113
|
last = p;
|
@@ -101,14 +117,17 @@ module FDR
|
|
101
117
|
|
102
118
|
return new;
|
103
119
|
}
|
104
|
-
|
105
|
-
|
120
|
+
EOC_CODE
|
121
|
+
builder.c_singleton c_code
|
122
|
+
|
123
|
+
builder
|
106
124
|
end
|
107
125
|
|
126
|
+
|
108
127
|
class << self
|
109
|
-
|
110
|
-
|
111
|
-
|
128
|
+
alias :adjust :adjust_fast
|
129
|
+
alias :adjust! :adjust_fast_self
|
130
|
+
alias :step_up :step_up_fast
|
112
131
|
end
|
113
132
|
|
114
133
|
# This will change the values of the floats in situ
|
@@ -116,13 +135,22 @@ module FDR
|
|
116
135
|
keys = []
|
117
136
|
values = []
|
118
137
|
|
119
|
-
data.
|
138
|
+
if data.respond_to? :unnamed
|
139
|
+
unnamed = data.unnamed
|
140
|
+
data.unnamed = true
|
141
|
+
end
|
142
|
+
|
143
|
+
data.collect{|key, value| [key, Array === ( v = field.nil? ? value : value[field] ) ? v.first : v] }.sort{|a,b|
|
120
144
|
a[1] <=> b[1]
|
121
145
|
}.each{|p|
|
122
146
|
keys << p[0]
|
123
147
|
values << p[1]
|
124
148
|
}
|
125
149
|
|
150
|
+
if data.respond_to? :unnamed
|
151
|
+
data.unnamed = unnamed
|
152
|
+
end
|
153
|
+
|
126
154
|
FDR.adjust!(values)
|
127
155
|
|
128
156
|
data
|
@@ -130,3 +158,5 @@ module FDR
|
|
130
158
|
|
131
159
|
end
|
132
160
|
|
161
|
+
|
162
|
+
|