rbbt-dm 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/network/paths.rb +136 -0
- data/lib/rbbt/statistics/hypergeometric.rb +47 -25
- data/lib/rbbt/statistics/random_walk.rb +158 -0
- data/lib/rbbt/vector/model/svm.rb +20 -0
- data/lib/rbbt/vector/model.rb +122 -0
- data/test/rbbt/network/test_paths.rb +68 -0
- data/test/rbbt/statistics/test_hypergeometric.rb +3 -4
- data/test/rbbt/vector/model/test_svm.rb +43 -0
- data/test/rbbt/vector/test_model.rb +74 -0
- metadata +44 -6
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'priority_queue'
|
2
|
+
module Paths
|
3
|
+
|
4
|
+
def self.dijkstra(adjacency, start_node, end_node = nil)
|
5
|
+
return nil unless adjacency.include? start_node
|
6
|
+
|
7
|
+
active = PriorityQueue.new
|
8
|
+
distances = Hash.new { 1.0 / 0.0 }
|
9
|
+
parents = Hash.new
|
10
|
+
|
11
|
+
active[start_node] = 0
|
12
|
+
best = 1.0 / 0.0
|
13
|
+
until active.empty?
|
14
|
+
u, distance = active.delete_min
|
15
|
+
distances[u] = distance
|
16
|
+
d = distance + 1
|
17
|
+
adjacency[u].each do |v|
|
18
|
+
next unless d < distances[v] and d < best # we can't relax this one
|
19
|
+
active[v] = distances[v] = d
|
20
|
+
parents[v] = u
|
21
|
+
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
if end_node
|
27
|
+
end_node = (end_node & parents.keys).first unless String === end_node
|
28
|
+
return nil if not parents.include? end_node
|
29
|
+
path = [end_node]
|
30
|
+
while not path.last === start_node
|
31
|
+
path << parents[path.last]
|
32
|
+
end
|
33
|
+
path
|
34
|
+
else
|
35
|
+
parents
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.weighted_dijkstra(adjacency, start_node, end_node = nil)
|
40
|
+
return nil unless adjacency.include? start_node
|
41
|
+
|
42
|
+
active = PriorityQueue.new
|
43
|
+
distances = Hash.new { 1.0 / 0.0 }
|
44
|
+
parents = Hash.new
|
45
|
+
|
46
|
+
active[start_node] = 0
|
47
|
+
best = 1.0 / 0.0
|
48
|
+
until active.empty?
|
49
|
+
u, distance = active.delete_min
|
50
|
+
distances[u] = distance
|
51
|
+
next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
|
52
|
+
Misc.zip_fields(adjacency[u]).each do |v,node_dist|
|
53
|
+
d = distance + node_dist
|
54
|
+
next unless d < distances[v] and d < best # we can't relax this one
|
55
|
+
active[v] = distances[v] = d
|
56
|
+
parents[v] = u
|
57
|
+
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
if end_node
|
62
|
+
end_node = (end_node & parents.keys).first unless String === end_node
|
63
|
+
return nil if not parents.include? end_node
|
64
|
+
path = [end_node]
|
65
|
+
while not path.last === start_node
|
66
|
+
path << parents[path.last]
|
67
|
+
end
|
68
|
+
path
|
69
|
+
else
|
70
|
+
parents
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.random_weighted_dijkstra(adjacency, l, start_node, end_node = nil)
|
75
|
+
return nil unless adjacency.include? start_node
|
76
|
+
|
77
|
+
active = PriorityQueue.new
|
78
|
+
distances = Hash.new { 1.0 / 0.0 }
|
79
|
+
parents = Hash.new
|
80
|
+
|
81
|
+
active[start_node] = 0
|
82
|
+
best = 1.0 / 0.0
|
83
|
+
until active.empty?
|
84
|
+
u, distance = active.delete_min
|
85
|
+
distances[u] = distance
|
86
|
+
next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
|
87
|
+
Misc.zip_fields(adjacency[u]).each do |v,node_dist|
|
88
|
+
d = distance + (node_dist * (l + rand))
|
89
|
+
next unless d < distances[v] and d < best # we can't relax this one
|
90
|
+
active[v] = distances[v] = d
|
91
|
+
parents[v] = u
|
92
|
+
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
if end_node
|
97
|
+
end_node = (end_node & parents.keys).first unless String === end_node
|
98
|
+
return nil if not parents.include? end_node
|
99
|
+
path = [end_node]
|
100
|
+
while not path.last === start_node
|
101
|
+
path << parents[path.last]
|
102
|
+
end
|
103
|
+
path
|
104
|
+
else
|
105
|
+
parents
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
module Entity
|
111
|
+
module Adjacent
|
112
|
+
def path_to(adjacency, entities)
|
113
|
+
if Array === self
|
114
|
+
self.collect{|gene| gene.path_to(adjacency, entities)}
|
115
|
+
else
|
116
|
+
if adjacency.type == :flat
|
117
|
+
Paths.dijkstra(adjacency, self, entities)
|
118
|
+
else
|
119
|
+
Paths.weighted_dijkstra(adjacency, self, entities)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def random_paths_to(adjacency, l, times, entities)
|
125
|
+
if Array === self
|
126
|
+
self.inject([]){|acc,gene| acc += gene.random_paths_to(adjacency, l, times, entities)}
|
127
|
+
else
|
128
|
+
paths = []
|
129
|
+
times.times do
|
130
|
+
paths << Paths.random_weighted_dijkstra(adjacency, l, self, entities)
|
131
|
+
end
|
132
|
+
paths
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -1,5 +1,8 @@
|
|
1
1
|
require 'inline'
|
2
|
-
require 'rbbt/
|
2
|
+
require 'rbbt/tsv'
|
3
|
+
require 'rbbt/persist'
|
4
|
+
require 'rbbt/statistics/fdr'
|
5
|
+
require 'rbbt/entity'
|
3
6
|
|
4
7
|
module Hypergeometric
|
5
8
|
class << self
|
@@ -88,49 +91,47 @@ double hypergeometric(double total, double support, double list, double found)
|
|
88
91
|
end
|
89
92
|
end
|
90
93
|
|
91
|
-
|
94
|
+
module TSV
|
92
95
|
|
93
|
-
def annotation_counts(fields = nil)
|
96
|
+
def annotation_counts(fields = nil, persistence = false)
|
94
97
|
fields ||= self.fields
|
95
98
|
fields = [fields] if String === fields or Symbol === fields
|
96
99
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
TCHash.get(annotation_count_cache_file)
|
102
|
-
else
|
103
|
-
Log.low "Saving annotation counts to #{ annotation_count_cache_file }"
|
104
|
-
hash = TCHash.get(annotation_count_cache_file)
|
105
|
-
|
106
|
-
counts = Hash.new(0)
|
107
|
-
through :main, fields do |key, values|
|
108
|
-
values.flatten.compact.uniq.each{|value| counts[value] += 1}
|
100
|
+
Persist.persist(filename, :marshal, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
|
101
|
+
data ||= Hash.new(0)
|
102
|
+
through :key, fields do |key, values|
|
103
|
+
values.flatten.compact.uniq.each{|value| data[value] += 1}
|
109
104
|
end
|
110
|
-
|
105
|
+
|
106
|
+
data
|
111
107
|
end
|
112
108
|
end
|
113
109
|
|
114
|
-
def enrichment(list, fields, options = {})
|
115
|
-
|
110
|
+
def enrichment(list, fields = nil, options = {})
|
111
|
+
fields ||= self.fields.first
|
112
|
+
options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false
|
116
113
|
Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
|
117
|
-
|
118
|
-
|
114
|
+
|
115
|
+
selected = select :key => list
|
116
|
+
|
119
117
|
tsv_size = keys.length
|
120
118
|
total = selected.keys.length
|
121
119
|
Log.debug "Found #{total} of #{list.length} entities"
|
122
120
|
|
123
|
-
counts = annotation_counts fields
|
121
|
+
counts = annotation_counts fields, options[:persist]
|
124
122
|
|
125
123
|
annotations = Hash.new 0
|
126
|
-
|
127
|
-
|
124
|
+
with_unnamed do
|
125
|
+
selected.through :key, fields do |key, values|
|
126
|
+
values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
|
127
|
+
annotations[value] += 1
|
128
|
+
}
|
129
|
+
end
|
128
130
|
end
|
129
131
|
|
130
132
|
pvalues = {}
|
131
133
|
annotations.each do |annotation, count|
|
132
|
-
|
133
|
-
next if count < options[:min_support]
|
134
|
+
next if count < options[:min_support] or not counts.include? annotation
|
134
135
|
pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
|
135
136
|
pvalues[annotation] = pvalue
|
136
137
|
end
|
@@ -138,9 +139,30 @@ class TSV
|
|
138
139
|
FDR.adjust_hash! pvalues if options[:fdr]
|
139
140
|
pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
|
140
141
|
|
142
|
+
TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
|
143
|
+
|
141
144
|
pvalues
|
142
145
|
end
|
146
|
+
|
147
|
+
def enrichment_for(tsv, field, options = {} )
|
148
|
+
tsv = tsv.tsv if Path === tsv
|
149
|
+
index = TSV.find_traversal(self, tsv, :in_namespace => false, :persist_input => true)
|
150
|
+
|
151
|
+
raise "Cannot traverse identifiers" if index.nil?
|
152
|
+
|
153
|
+
source_keys = index.values_at(*self.keys).flatten.compact.uniq
|
154
|
+
|
155
|
+
tsv.enrichment source_keys, field, options
|
156
|
+
end
|
143
157
|
end
|
144
158
|
|
159
|
+
module Entity
|
160
|
+
module Enriched
|
161
|
+
def enrichment(file, fields = nil, options = {})
|
162
|
+
file = file.tsv if Path === file
|
163
|
+
file.enrichment self, fields, options
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
145
167
|
|
146
168
|
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'png'
|
2
|
+
require 'inline'
|
3
|
+
|
4
|
+
module RandomWalk
|
5
|
+
|
6
|
+
class << self
|
7
|
+
inline do |builder|
|
8
|
+
|
9
|
+
builder.c_raw <<-'EOC'
|
10
|
+
double weight(int position, int mean){
|
11
|
+
double rel_pos = (double) abs(position - mean) / mean;
|
12
|
+
double weight = 0.3 * 0.5 * rel_pos + 0.7 * (exp(30*rel_pos)/exp(30));
|
13
|
+
return(weight);
|
14
|
+
}
|
15
|
+
EOC
|
16
|
+
|
17
|
+
builder.c <<-'EOC'
|
18
|
+
double fast_score_scale(VALUE positions, int total, int missing){
|
19
|
+
int idx;
|
20
|
+
|
21
|
+
int mean = total / 2;
|
22
|
+
|
23
|
+
VALUE rel_q = rb_ary_new();
|
24
|
+
VALUE rel_l = rb_ary_new();
|
25
|
+
|
26
|
+
rb_ary_push(rel_q,rb_float_new(0));
|
27
|
+
|
28
|
+
// Rescale positions and accumulate weights
|
29
|
+
double total_weights = 0;
|
30
|
+
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
31
|
+
int position = FIX2INT(rb_ary_entry(positions, idx));
|
32
|
+
|
33
|
+
rb_ary_push(rel_l, rb_float_new((double) position / total));
|
34
|
+
|
35
|
+
total_weights += weight(position, mean);
|
36
|
+
rb_ary_push(rel_q, rb_float_new(total_weights));
|
37
|
+
}
|
38
|
+
|
39
|
+
// Add penalty for missing genes
|
40
|
+
double penalty = missing * weight(mean * 0.8, mean);
|
41
|
+
total_weights = total_weights + penalty;
|
42
|
+
|
43
|
+
// Traverse list and get extreme values
|
44
|
+
double max_top, max_bottom;
|
45
|
+
max_top = max_bottom = 0;
|
46
|
+
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
47
|
+
double top = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
|
48
|
+
RFLOAT(rb_ary_entry(rel_l, idx))->value;
|
49
|
+
double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
|
50
|
+
RFLOAT(rb_ary_entry(rel_l, idx))->value;
|
51
|
+
|
52
|
+
if (top > max_top) max_top = top;
|
53
|
+
if (bottom > max_bottom) max_bottom = bottom;
|
54
|
+
}
|
55
|
+
|
56
|
+
if (max_top > max_bottom) return max_top;
|
57
|
+
else return -max_bottom;
|
58
|
+
}
|
59
|
+
EOC
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class << self
|
65
|
+
alias score fast_score_scale
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.combine(up, down)
|
69
|
+
return down if up == 0
|
70
|
+
return up if down == 0
|
71
|
+
|
72
|
+
return up - down
|
73
|
+
if (up > 0) == (down > 0)
|
74
|
+
return 0
|
75
|
+
else
|
76
|
+
up - down
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.score_up_down(up, down, total, missing = 0)
|
81
|
+
scores_up = score(up, total, missing)
|
82
|
+
scores_down = score(down, total, missing)
|
83
|
+
|
84
|
+
combine(scores_up, scores_down)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Two sided
|
88
|
+
def self.permutations(size, total, missing = 0, times = 10000)
|
89
|
+
if size == 0
|
90
|
+
[0] * times
|
91
|
+
else
|
92
|
+
(1..times).collect do
|
93
|
+
score(Array.new(size){ (rand * total).to_i }.sort, total, missing).abs
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.permutations_up_down(size_up, size_down, total, missing = 0, times = 10000)
|
99
|
+
(1..times).collect do
|
100
|
+
score_up_down(Array.new(size_up){ (rand * total).to_i }.sort, Array.new(size_down){ (rand * total).to_i }.sort, total, missing).abs
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.pvalue(permutations, score)
|
105
|
+
score = score.abs
|
106
|
+
permutations.inject(0){|acc, per|
|
107
|
+
acc += 1 if per > score
|
108
|
+
acc
|
109
|
+
}.to_f / permutations.length
|
110
|
+
end
|
111
|
+
|
112
|
+
COLORS = {
|
113
|
+
:red => PNG::Color::Red,
|
114
|
+
:green => PNG::Color::Green,
|
115
|
+
:white => PNG::Color::White,
|
116
|
+
:black => PNG::Color::Black,
|
117
|
+
|
118
|
+
}
|
119
|
+
|
120
|
+
def self.draw_hits(hits, total, filename = nil, options = {})
|
121
|
+
|
122
|
+
size = options[:size] || total
|
123
|
+
bg_color = options[:bg_color] || :white
|
124
|
+
width = options[:width] || 20
|
125
|
+
sections = options[:sections] || []
|
126
|
+
|
127
|
+
size = [size, total].min
|
128
|
+
|
129
|
+
hits = hits.collect{|h| h - 1}
|
130
|
+
if size < total
|
131
|
+
hits = hits.collect{|h| (h.to_f * size / total).to_i}
|
132
|
+
end
|
133
|
+
|
134
|
+
canvas = PNG::Canvas.new size, width, COLORS[bg_color]
|
135
|
+
|
136
|
+
sections.each{|color, info|
|
137
|
+
start = info[0]
|
138
|
+
finish = info[1]
|
139
|
+
(start..finish).each{|x|
|
140
|
+
(0..width - 1).each{|y|
|
141
|
+
canvas[x,y] = COLORS[color]
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
hits.each{|hit|
|
147
|
+
canvas.line hit, 0, hit , width - 1, PNG::Color::Black
|
148
|
+
}
|
149
|
+
|
150
|
+
png = PNG.new canvas
|
151
|
+
|
152
|
+
if filename
|
153
|
+
png.save filename
|
154
|
+
else
|
155
|
+
png.to_blob
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rbbt/vector/model'
|
2
|
+
class SVMModel < VectorModel
|
3
|
+
def initialize(dir)
|
4
|
+
super(dir)
|
5
|
+
|
6
|
+
@extract_features = Proc.new{|element|
|
7
|
+
element
|
8
|
+
}
|
9
|
+
|
10
|
+
@train_model =<<-EOF
|
11
|
+
library(e1071);
|
12
|
+
model = svm(class ~ ., data = features);
|
13
|
+
EOF
|
14
|
+
|
15
|
+
@eval_model =<<-EOF
|
16
|
+
library(e1071);
|
17
|
+
label = predict(model, features);
|
18
|
+
EOF
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
|
3
|
+
class VectorModel
|
4
|
+
attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
|
5
|
+
attr_accessor :features, :labels
|
6
|
+
|
7
|
+
def self.R_train(model_file, features, labels, code)
|
8
|
+
TmpFile.with_file do |feature_file|
|
9
|
+
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
10
|
+
Open.write(feature_file + '.class', labels * "\n")
|
11
|
+
|
12
|
+
R.run <<-EOF
|
13
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
14
|
+
labels = scan("#{ feature_file }.class");
|
15
|
+
features = cbind(features, class = labels);
|
16
|
+
#{code}
|
17
|
+
save(model, file='#{model_file}')
|
18
|
+
EOF
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.R_eval(model_file, features, list, code)
|
23
|
+
TmpFile.with_file do |feature_file|
|
24
|
+
TmpFile.with_file do |results|
|
25
|
+
if list
|
26
|
+
Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
|
27
|
+
else
|
28
|
+
Open.write(feature_file, features * "\t" + "\n")
|
29
|
+
end
|
30
|
+
|
31
|
+
io = R.run <<-EOF
|
32
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
33
|
+
load(file="#{model_file}");
|
34
|
+
#{code}
|
35
|
+
cat(paste(label, sep="\\n"));
|
36
|
+
EOF
|
37
|
+
|
38
|
+
res = io.read.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
|
39
|
+
|
40
|
+
if list
|
41
|
+
res
|
42
|
+
else
|
43
|
+
res.first
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
|
50
|
+
@directory = directory
|
51
|
+
FileUtils.mkdir_p @directory unless File.exists? @directory
|
52
|
+
@model_file = File.join(@directory, "model")
|
53
|
+
extract_features = @extract_features
|
54
|
+
train_model = @train_model
|
55
|
+
eval_model = @eval_model
|
56
|
+
@features = []
|
57
|
+
@labels = []
|
58
|
+
end
|
59
|
+
|
60
|
+
def add(element, label = nil)
|
61
|
+
@features << extract_features.call(element)
|
62
|
+
@labels << label unless label.nil?
|
63
|
+
end
|
64
|
+
|
65
|
+
def train
|
66
|
+
case
|
67
|
+
when Proc === train_model
|
68
|
+
train_model.call(@model_file, @features, @labels)
|
69
|
+
when String === train_model
|
70
|
+
SVMModel.R_train(@model_file, @features, @labels, train_model)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def eval(element)
|
75
|
+
case
|
76
|
+
when Proc === eval_model
|
77
|
+
eval_model.call(@model_file, extract_features.call(element), false)
|
78
|
+
when String === eval_model
|
79
|
+
SVMModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def eval_list(elements, extract = true)
|
84
|
+
case
|
85
|
+
when Proc === eval_model
|
86
|
+
eval_model.call(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true)
|
87
|
+
when String === eval_model
|
88
|
+
SVMModel.R_eval(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true, eval_model)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def cross_validation(folds = 10)
|
93
|
+
saved_features = @features
|
94
|
+
saved_labels = @labels
|
95
|
+
seq = (0..features.length - 1).to_a
|
96
|
+
|
97
|
+
chunk_size = features.length / folds
|
98
|
+
|
99
|
+
acc = []
|
100
|
+
folds.times do
|
101
|
+
seq = seq.shuffle
|
102
|
+
eval_chunk = seq[0..chunk_size]
|
103
|
+
train_chunk = seq[chunk_size.. -1]
|
104
|
+
|
105
|
+
eval_features = @features.values_at *eval_chunk
|
106
|
+
eval_labels = @labels.values_at *eval_chunk
|
107
|
+
|
108
|
+
@features = @features.values_at *train_chunk
|
109
|
+
@labels = @labels.values_at *train_chunk
|
110
|
+
|
111
|
+
train
|
112
|
+
predictions = eval_list eval_features, false
|
113
|
+
|
114
|
+
acc << predictions.zip(eval_labels).collect{|pred,lab| pred - lab < 0.5 ? 1 : 0}.inject(0){|acc,e| acc +=e} / chunk_size
|
115
|
+
|
116
|
+
@features = saved_features
|
117
|
+
@labels = saved_labels
|
118
|
+
end
|
119
|
+
|
120
|
+
acc
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'rbbt/network/paths'
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rbbt/sources/string'
|
5
|
+
require 'set'
|
6
|
+
|
7
|
+
|
8
|
+
class TestNetwork < Test::Unit::TestCase
|
9
|
+
def test_dijsktra
|
10
|
+
string = STRING.protein_protein.tsv :persist => false, :fields => ["Interactor Ensembl Protein ID"], :type => :flat
|
11
|
+
string.unnamed = true
|
12
|
+
|
13
|
+
start_node = "ENSP00000256078"
|
14
|
+
end_node = "ENSP00000306245"
|
15
|
+
|
16
|
+
path = Paths.dijkstra(string, start_node, [end_node])
|
17
|
+
|
18
|
+
assert path != nil
|
19
|
+
assert path.include? start_node
|
20
|
+
assert path.include? end_node
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_weighted_dijsktra
|
24
|
+
string = STRING.protein_protein.tsv
|
25
|
+
|
26
|
+
string.process "Score" do |scores|
|
27
|
+
scores.collect{|score| 1000 - score.to_i}
|
28
|
+
end
|
29
|
+
string.unnamed = true
|
30
|
+
|
31
|
+
start_node = "ENSP00000256078"
|
32
|
+
end_node = "ENSP00000306245"
|
33
|
+
|
34
|
+
path = Paths.weighted_dijkstra(string, start_node, end_node)
|
35
|
+
|
36
|
+
assert path != nil
|
37
|
+
assert path.include? start_node
|
38
|
+
assert path.include? end_node
|
39
|
+
|
40
|
+
path = Paths.weighted_dijkstra(string, start_node, Set.new([end_node]))
|
41
|
+
|
42
|
+
assert path != nil
|
43
|
+
assert path.include? start_node
|
44
|
+
assert path.include? end_node
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_random_weighted_dijsktra
|
49
|
+
string = STRING.protein_protein.tsv
|
50
|
+
|
51
|
+
string.process "Score" do |scores|
|
52
|
+
scores.collect{|score| 1000 - score.to_i}
|
53
|
+
end
|
54
|
+
string.unnamed = true
|
55
|
+
|
56
|
+
start_node = "ENSP00000256078"
|
57
|
+
end_node = "ENSP00000306245"
|
58
|
+
|
59
|
+
path = Paths.random_weighted_dijkstra(string, 0.8, start_node, end_node)
|
60
|
+
|
61
|
+
assert path != nil
|
62
|
+
assert path.include? start_node
|
63
|
+
assert path.include? end_node
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
|
@@ -17,7 +17,7 @@ row3 a C Id4
|
|
17
17
|
EOF
|
18
18
|
|
19
19
|
TmpFile.with_file(content) do |filename|
|
20
|
-
tsv = TSV.
|
20
|
+
tsv = TSV.open(filename, :sep => /\s+/)
|
21
21
|
counts = tsv.annotation_counts
|
22
22
|
assert_equal 2, counts['a']
|
23
23
|
end
|
@@ -36,10 +36,9 @@ row7 A B Id3
|
|
36
36
|
EOF
|
37
37
|
|
38
38
|
TmpFile.with_file(content) do |filename|
|
39
|
-
tsv = TSV.
|
39
|
+
tsv = TSV.open(filename, :sep => /\s+/)
|
40
40
|
|
41
|
-
assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
|
42
|
-
assert_equal %w(aa aaa), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue > 0.05 ? annot : nil}.compact
|
41
|
+
assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA", :fdr => false).collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
|
43
42
|
end
|
44
43
|
end
|
45
44
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
|
2
|
+
require 'rbbt/vector/model/svm'
|
3
|
+
require 'rbbt/util/R'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestSVMModel < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_model
|
9
|
+
text =<<-EOF
|
10
|
+
1 0;1;1
|
11
|
+
1 1;0;1
|
12
|
+
1 1;1;1
|
13
|
+
1 0;1;1
|
14
|
+
1 1;1;1
|
15
|
+
0 0;1;0
|
16
|
+
0 1;0;0
|
17
|
+
0 0;1;0
|
18
|
+
0 1;0;0
|
19
|
+
EOF
|
20
|
+
|
21
|
+
TmpFile.with_file() do |dir|
|
22
|
+
FileUtils.mkdir_p dir
|
23
|
+
model = SVMModel.new(dir)
|
24
|
+
|
25
|
+
model.extract_features = Proc.new{|element|
|
26
|
+
element.split(";")
|
27
|
+
}
|
28
|
+
|
29
|
+
text.split(/\n/).each do |line|
|
30
|
+
label, features = line.split(" ")
|
31
|
+
model.add(features, label)
|
32
|
+
end
|
33
|
+
|
34
|
+
model.train
|
35
|
+
|
36
|
+
assert model.eval("1;1;1") > 0.5
|
37
|
+
assert model.eval("0;0;0") < 0.5
|
38
|
+
|
39
|
+
assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v > 0.5}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'rbbt/vector/model'
|
3
|
+
require 'rbbt/util/R'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestVectorModel < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_model
|
9
|
+
text =<<-EOF
|
10
|
+
1 0;1;1
|
11
|
+
1 1;0;1
|
12
|
+
1 1;1;1
|
13
|
+
1 0;1;1
|
14
|
+
1 1;1;1
|
15
|
+
0 0;1;0
|
16
|
+
0 1;0;0
|
17
|
+
0 0;1;0
|
18
|
+
0 1;0;0
|
19
|
+
EOF
|
20
|
+
|
21
|
+
TmpFile.with_file() do |dir|
|
22
|
+
FileUtils.mkdir_p dir
|
23
|
+
model = VectorModel.new(dir)
|
24
|
+
|
25
|
+
model.extract_features = Proc.new{|element|
|
26
|
+
element.split(";")
|
27
|
+
}
|
28
|
+
|
29
|
+
model.train_model = Proc.new{|model_file,features,labels|
|
30
|
+
TmpFile.with_file do |feature_file|
|
31
|
+
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
32
|
+
Open.write(feature_file + '.class', labels * "\n")
|
33
|
+
R.run <<-EOF
|
34
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
35
|
+
labels = scan("#{ feature_file }.class", what=numeric());
|
36
|
+
features = cbind(features, class = labels);
|
37
|
+
library(e1071)
|
38
|
+
model = svm(class ~ ., data = features)
|
39
|
+
save(model, file="#{ model_file }");
|
40
|
+
EOF
|
41
|
+
end
|
42
|
+
}
|
43
|
+
|
44
|
+
model.eval_model = Proc.new{|model_file,features|
|
45
|
+
TmpFile.with_file do |feature_file|
|
46
|
+
TmpFile.with_file do |results|
|
47
|
+
Open.write(feature_file, features * "\t")
|
48
|
+
puts R.run(<<-EOF
|
49
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
50
|
+
library(e1071)
|
51
|
+
load(file="#{ model_file }")
|
52
|
+
label = predict(model, features);
|
53
|
+
cat(label, file="#{results}");
|
54
|
+
EOF
|
55
|
+
).read
|
56
|
+
Open.read(results)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
}
|
61
|
+
|
62
|
+
text.split(/\n/).each do |line|
|
63
|
+
label, features = line.split(" ")
|
64
|
+
model.add(features, label)
|
65
|
+
end
|
66
|
+
|
67
|
+
model.train
|
68
|
+
|
69
|
+
assert model.eval("1;1;1").to_f > 0.5
|
70
|
+
assert model.eval("0;0;0").to_f < 0.5
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2011-10-04 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -32,6 +32,34 @@ dependencies:
|
|
32
32
|
version: "0"
|
33
33
|
type: :runtime
|
34
34
|
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rbbt-entities
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: RubyInline
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
35
63
|
description: Data-mining and statistics
|
36
64
|
email: miguel.vazquez@fdi.ucm.es
|
37
65
|
executables: []
|
@@ -42,10 +70,17 @@ extra_rdoc_files:
|
|
42
70
|
- LICENSE
|
43
71
|
files:
|
44
72
|
- LICENSE
|
73
|
+
- lib/rbbt/network/paths.rb
|
45
74
|
- lib/rbbt/statistics/fdr.rb
|
46
75
|
- lib/rbbt/statistics/hypergeometric.rb
|
76
|
+
- lib/rbbt/statistics/random_walk.rb
|
77
|
+
- lib/rbbt/vector/model.rb
|
78
|
+
- lib/rbbt/vector/model/svm.rb
|
47
79
|
- test/rbbt/statistics/test_fdr.rb
|
48
80
|
- test/rbbt/statistics/test_hypergeometric.rb
|
81
|
+
- test/rbbt/vector/test_model.rb
|
82
|
+
- test/rbbt/vector/model/test_svm.rb
|
83
|
+
- test/rbbt/network/test_paths.rb
|
49
84
|
- test/test_helper.rb
|
50
85
|
has_rdoc: true
|
51
86
|
homepage: http://github.com/mikisvaz/rbbt-phgx
|
@@ -77,11 +112,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
112
|
requirements: []
|
78
113
|
|
79
114
|
rubyforge_project:
|
80
|
-
rubygems_version: 1.
|
115
|
+
rubygems_version: 1.6.2
|
81
116
|
signing_key:
|
82
117
|
specification_version: 3
|
83
118
|
summary: Data-mining and statistics
|
84
119
|
test_files:
|
85
120
|
- test/rbbt/statistics/test_fdr.rb
|
86
121
|
- test/rbbt/statistics/test_hypergeometric.rb
|
122
|
+
- test/rbbt/vector/test_model.rb
|
123
|
+
- test/rbbt/vector/model/test_svm.rb
|
124
|
+
- test/rbbt/network/test_paths.rb
|
87
125
|
- test/test_helper.rb
|