rbbt-dm 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt/network/paths.rb +136 -0
- data/lib/rbbt/statistics/hypergeometric.rb +47 -25
- data/lib/rbbt/statistics/random_walk.rb +158 -0
- data/lib/rbbt/vector/model/svm.rb +20 -0
- data/lib/rbbt/vector/model.rb +122 -0
- data/test/rbbt/network/test_paths.rb +68 -0
- data/test/rbbt/statistics/test_hypergeometric.rb +3 -4
- data/test/rbbt/vector/model/test_svm.rb +43 -0
- data/test/rbbt/vector/test_model.rb +74 -0
- metadata +44 -6
@@ -0,0 +1,136 @@
|
|
1
|
+
require 'priority_queue'
|
2
|
+
module Paths
|
3
|
+
|
4
|
+
def self.dijkstra(adjacency, start_node, end_node = nil)
|
5
|
+
return nil unless adjacency.include? start_node
|
6
|
+
|
7
|
+
active = PriorityQueue.new
|
8
|
+
distances = Hash.new { 1.0 / 0.0 }
|
9
|
+
parents = Hash.new
|
10
|
+
|
11
|
+
active[start_node] = 0
|
12
|
+
best = 1.0 / 0.0
|
13
|
+
until active.empty?
|
14
|
+
u, distance = active.delete_min
|
15
|
+
distances[u] = distance
|
16
|
+
d = distance + 1
|
17
|
+
adjacency[u].each do |v|
|
18
|
+
next unless d < distances[v] and d < best # we can't relax this one
|
19
|
+
active[v] = distances[v] = d
|
20
|
+
parents[v] = u
|
21
|
+
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
if end_node
|
27
|
+
end_node = (end_node & parents.keys).first unless String === end_node
|
28
|
+
return nil if not parents.include? end_node
|
29
|
+
path = [end_node]
|
30
|
+
while not path.last === start_node
|
31
|
+
path << parents[path.last]
|
32
|
+
end
|
33
|
+
path
|
34
|
+
else
|
35
|
+
parents
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.weighted_dijkstra(adjacency, start_node, end_node = nil)
|
40
|
+
return nil unless adjacency.include? start_node
|
41
|
+
|
42
|
+
active = PriorityQueue.new
|
43
|
+
distances = Hash.new { 1.0 / 0.0 }
|
44
|
+
parents = Hash.new
|
45
|
+
|
46
|
+
active[start_node] = 0
|
47
|
+
best = 1.0 / 0.0
|
48
|
+
until active.empty?
|
49
|
+
u, distance = active.delete_min
|
50
|
+
distances[u] = distance
|
51
|
+
next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
|
52
|
+
Misc.zip_fields(adjacency[u]).each do |v,node_dist|
|
53
|
+
d = distance + node_dist
|
54
|
+
next unless d < distances[v] and d < best # we can't relax this one
|
55
|
+
active[v] = distances[v] = d
|
56
|
+
parents[v] = u
|
57
|
+
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
if end_node
|
62
|
+
end_node = (end_node & parents.keys).first unless String === end_node
|
63
|
+
return nil if not parents.include? end_node
|
64
|
+
path = [end_node]
|
65
|
+
while not path.last === start_node
|
66
|
+
path << parents[path.last]
|
67
|
+
end
|
68
|
+
path
|
69
|
+
else
|
70
|
+
parents
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.random_weighted_dijkstra(adjacency, l, start_node, end_node = nil)
|
75
|
+
return nil unless adjacency.include? start_node
|
76
|
+
|
77
|
+
active = PriorityQueue.new
|
78
|
+
distances = Hash.new { 1.0 / 0.0 }
|
79
|
+
parents = Hash.new
|
80
|
+
|
81
|
+
active[start_node] = 0
|
82
|
+
best = 1.0 / 0.0
|
83
|
+
until active.empty?
|
84
|
+
u, distance = active.delete_min
|
85
|
+
distances[u] = distance
|
86
|
+
next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
|
87
|
+
Misc.zip_fields(adjacency[u]).each do |v,node_dist|
|
88
|
+
d = distance + (node_dist * (l + rand))
|
89
|
+
next unless d < distances[v] and d < best # we can't relax this one
|
90
|
+
active[v] = distances[v] = d
|
91
|
+
parents[v] = u
|
92
|
+
best = d if (String === end_node ? end_node == v : end_node.include?(v))
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
if end_node
|
97
|
+
end_node = (end_node & parents.keys).first unless String === end_node
|
98
|
+
return nil if not parents.include? end_node
|
99
|
+
path = [end_node]
|
100
|
+
while not path.last === start_node
|
101
|
+
path << parents[path.last]
|
102
|
+
end
|
103
|
+
path
|
104
|
+
else
|
105
|
+
parents
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
module Entity
|
111
|
+
module Adjacent
|
112
|
+
def path_to(adjacency, entities)
|
113
|
+
if Array === self
|
114
|
+
self.collect{|gene| gene.path_to(adjacency, entities)}
|
115
|
+
else
|
116
|
+
if adjacency.type == :flat
|
117
|
+
Paths.dijkstra(adjacency, self, entities)
|
118
|
+
else
|
119
|
+
Paths.weighted_dijkstra(adjacency, self, entities)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def random_paths_to(adjacency, l, times, entities)
|
125
|
+
if Array === self
|
126
|
+
self.inject([]){|acc,gene| acc += gene.random_paths_to(adjacency, l, times, entities)}
|
127
|
+
else
|
128
|
+
paths = []
|
129
|
+
times.times do
|
130
|
+
paths << Paths.random_weighted_dijkstra(adjacency, l, self, entities)
|
131
|
+
end
|
132
|
+
paths
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -1,5 +1,8 @@
|
|
1
1
|
require 'inline'
|
2
|
-
require 'rbbt/
|
2
|
+
require 'rbbt/tsv'
|
3
|
+
require 'rbbt/persist'
|
4
|
+
require 'rbbt/statistics/fdr'
|
5
|
+
require 'rbbt/entity'
|
3
6
|
|
4
7
|
module Hypergeometric
|
5
8
|
class << self
|
@@ -88,49 +91,47 @@ double hypergeometric(double total, double support, double list, double found)
|
|
88
91
|
end
|
89
92
|
end
|
90
93
|
|
91
|
-
|
94
|
+
module TSV
|
92
95
|
|
93
|
-
def annotation_counts(fields = nil)
|
96
|
+
def annotation_counts(fields = nil, persistence = false)
|
94
97
|
fields ||= self.fields
|
95
98
|
fields = [fields] if String === fields or Symbol === fields
|
96
99
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
TCHash.get(annotation_count_cache_file)
|
102
|
-
else
|
103
|
-
Log.low "Saving annotation counts to #{ annotation_count_cache_file }"
|
104
|
-
hash = TCHash.get(annotation_count_cache_file)
|
105
|
-
|
106
|
-
counts = Hash.new(0)
|
107
|
-
through :main, fields do |key, values|
|
108
|
-
values.flatten.compact.uniq.each{|value| counts[value] += 1}
|
100
|
+
Persist.persist(filename, :marshal, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
|
101
|
+
data ||= Hash.new(0)
|
102
|
+
through :key, fields do |key, values|
|
103
|
+
values.flatten.compact.uniq.each{|value| data[value] += 1}
|
109
104
|
end
|
110
|
-
|
105
|
+
|
106
|
+
data
|
111
107
|
end
|
112
108
|
end
|
113
109
|
|
114
|
-
def enrichment(list, fields, options = {})
|
115
|
-
|
110
|
+
def enrichment(list, fields = nil, options = {})
|
111
|
+
fields ||= self.fields.first
|
112
|
+
options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false
|
116
113
|
Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
|
117
|
-
|
118
|
-
|
114
|
+
|
115
|
+
selected = select :key => list
|
116
|
+
|
119
117
|
tsv_size = keys.length
|
120
118
|
total = selected.keys.length
|
121
119
|
Log.debug "Found #{total} of #{list.length} entities"
|
122
120
|
|
123
|
-
counts = annotation_counts fields
|
121
|
+
counts = annotation_counts fields, options[:persist]
|
124
122
|
|
125
123
|
annotations = Hash.new 0
|
126
|
-
|
127
|
-
|
124
|
+
with_unnamed do
|
125
|
+
selected.through :key, fields do |key, values|
|
126
|
+
values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
|
127
|
+
annotations[value] += 1
|
128
|
+
}
|
129
|
+
end
|
128
130
|
end
|
129
131
|
|
130
132
|
pvalues = {}
|
131
133
|
annotations.each do |annotation, count|
|
132
|
-
|
133
|
-
next if count < options[:min_support]
|
134
|
+
next if count < options[:min_support] or not counts.include? annotation
|
134
135
|
pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
|
135
136
|
pvalues[annotation] = pvalue
|
136
137
|
end
|
@@ -138,9 +139,30 @@ class TSV
|
|
138
139
|
FDR.adjust_hash! pvalues if options[:fdr]
|
139
140
|
pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
|
140
141
|
|
142
|
+
TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
|
143
|
+
|
141
144
|
pvalues
|
142
145
|
end
|
146
|
+
|
147
|
+
def enrichment_for(tsv, field, options = {} )
|
148
|
+
tsv = tsv.tsv if Path === tsv
|
149
|
+
index = TSV.find_traversal(self, tsv, :in_namespace => false, :persist_input => true)
|
150
|
+
|
151
|
+
raise "Cannot traverse identifiers" if index.nil?
|
152
|
+
|
153
|
+
source_keys = index.values_at(*self.keys).flatten.compact.uniq
|
154
|
+
|
155
|
+
tsv.enrichment source_keys, field, options
|
156
|
+
end
|
143
157
|
end
|
144
158
|
|
159
|
+
module Entity
|
160
|
+
module Enriched
|
161
|
+
def enrichment(file, fields = nil, options = {})
|
162
|
+
file = file.tsv if Path === file
|
163
|
+
file.enrichment self, fields, options
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
145
167
|
|
146
168
|
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'png'
|
2
|
+
require 'inline'
|
3
|
+
|
4
|
+
module RandomWalk
|
5
|
+
|
6
|
+
class << self
|
7
|
+
inline do |builder|
|
8
|
+
|
9
|
+
builder.c_raw <<-'EOC'
|
10
|
+
double weight(int position, int mean){
|
11
|
+
double rel_pos = (double) abs(position - mean) / mean;
|
12
|
+
double weight = 0.3 * 0.5 * rel_pos + 0.7 * (exp(30*rel_pos)/exp(30));
|
13
|
+
return(weight);
|
14
|
+
}
|
15
|
+
EOC
|
16
|
+
|
17
|
+
builder.c <<-'EOC'
|
18
|
+
double fast_score_scale(VALUE positions, int total, int missing){
|
19
|
+
int idx;
|
20
|
+
|
21
|
+
int mean = total / 2;
|
22
|
+
|
23
|
+
VALUE rel_q = rb_ary_new();
|
24
|
+
VALUE rel_l = rb_ary_new();
|
25
|
+
|
26
|
+
rb_ary_push(rel_q,rb_float_new(0));
|
27
|
+
|
28
|
+
// Rescale positions and accumulate weights
|
29
|
+
double total_weights = 0;
|
30
|
+
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
31
|
+
int position = FIX2INT(rb_ary_entry(positions, idx));
|
32
|
+
|
33
|
+
rb_ary_push(rel_l, rb_float_new((double) position / total));
|
34
|
+
|
35
|
+
total_weights += weight(position, mean);
|
36
|
+
rb_ary_push(rel_q, rb_float_new(total_weights));
|
37
|
+
}
|
38
|
+
|
39
|
+
// Add penalty for missing genes
|
40
|
+
double penalty = missing * weight(mean * 0.8, mean);
|
41
|
+
total_weights = total_weights + penalty;
|
42
|
+
|
43
|
+
// Traverse list and get extreme values
|
44
|
+
double max_top, max_bottom;
|
45
|
+
max_top = max_bottom = 0;
|
46
|
+
for (idx = 0; idx < RARRAY(positions)->len; idx++){
|
47
|
+
double top = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
|
48
|
+
RFLOAT(rb_ary_entry(rel_l, idx))->value;
|
49
|
+
double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
|
50
|
+
RFLOAT(rb_ary_entry(rel_l, idx))->value;
|
51
|
+
|
52
|
+
if (top > max_top) max_top = top;
|
53
|
+
if (bottom > max_bottom) max_bottom = bottom;
|
54
|
+
}
|
55
|
+
|
56
|
+
if (max_top > max_bottom) return max_top;
|
57
|
+
else return -max_bottom;
|
58
|
+
}
|
59
|
+
EOC
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class << self
|
65
|
+
alias score fast_score_scale
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.combine(up, down)
|
69
|
+
return down if up == 0
|
70
|
+
return up if down == 0
|
71
|
+
|
72
|
+
return up - down
|
73
|
+
if (up > 0) == (down > 0)
|
74
|
+
return 0
|
75
|
+
else
|
76
|
+
up - down
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.score_up_down(up, down, total, missing = 0)
|
81
|
+
scores_up = score(up, total, missing)
|
82
|
+
scores_down = score(down, total, missing)
|
83
|
+
|
84
|
+
combine(scores_up, scores_down)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Two sided
|
88
|
+
def self.permutations(size, total, missing = 0, times = 10000)
|
89
|
+
if size == 0
|
90
|
+
[0] * times
|
91
|
+
else
|
92
|
+
(1..times).collect do
|
93
|
+
score(Array.new(size){ (rand * total).to_i }.sort, total, missing).abs
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.permutations_up_down(size_up, size_down, total, missing = 0, times = 10000)
|
99
|
+
(1..times).collect do
|
100
|
+
score_up_down(Array.new(size_up){ (rand * total).to_i }.sort, Array.new(size_down){ (rand * total).to_i }.sort, total, missing).abs
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.pvalue(permutations, score)
|
105
|
+
score = score.abs
|
106
|
+
permutations.inject(0){|acc, per|
|
107
|
+
acc += 1 if per > score
|
108
|
+
acc
|
109
|
+
}.to_f / permutations.length
|
110
|
+
end
|
111
|
+
|
112
|
+
COLORS = {
|
113
|
+
:red => PNG::Color::Red,
|
114
|
+
:green => PNG::Color::Green,
|
115
|
+
:white => PNG::Color::White,
|
116
|
+
:black => PNG::Color::Black,
|
117
|
+
|
118
|
+
}
|
119
|
+
|
120
|
+
def self.draw_hits(hits, total, filename = nil, options = {})
|
121
|
+
|
122
|
+
size = options[:size] || total
|
123
|
+
bg_color = options[:bg_color] || :white
|
124
|
+
width = options[:width] || 20
|
125
|
+
sections = options[:sections] || []
|
126
|
+
|
127
|
+
size = [size, total].min
|
128
|
+
|
129
|
+
hits = hits.collect{|h| h - 1}
|
130
|
+
if size < total
|
131
|
+
hits = hits.collect{|h| (h.to_f * size / total).to_i}
|
132
|
+
end
|
133
|
+
|
134
|
+
canvas = PNG::Canvas.new size, width, COLORS[bg_color]
|
135
|
+
|
136
|
+
sections.each{|color, info|
|
137
|
+
start = info[0]
|
138
|
+
finish = info[1]
|
139
|
+
(start..finish).each{|x|
|
140
|
+
(0..width - 1).each{|y|
|
141
|
+
canvas[x,y] = COLORS[color]
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
hits.each{|hit|
|
147
|
+
canvas.line hit, 0, hit , width - 1, PNG::Color::Black
|
148
|
+
}
|
149
|
+
|
150
|
+
png = PNG.new canvas
|
151
|
+
|
152
|
+
if filename
|
153
|
+
png.save filename
|
154
|
+
else
|
155
|
+
png.to_blob
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rbbt/vector/model'
|
2
|
+
class SVMModel < VectorModel
|
3
|
+
def initialize(dir)
|
4
|
+
super(dir)
|
5
|
+
|
6
|
+
@extract_features = Proc.new{|element|
|
7
|
+
element
|
8
|
+
}
|
9
|
+
|
10
|
+
@train_model =<<-EOF
|
11
|
+
library(e1071);
|
12
|
+
model = svm(class ~ ., data = features);
|
13
|
+
EOF
|
14
|
+
|
15
|
+
@eval_model =<<-EOF
|
16
|
+
library(e1071);
|
17
|
+
label = predict(model, features);
|
18
|
+
EOF
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
|
3
|
+
class VectorModel
|
4
|
+
attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
|
5
|
+
attr_accessor :features, :labels
|
6
|
+
|
7
|
+
def self.R_train(model_file, features, labels, code)
|
8
|
+
TmpFile.with_file do |feature_file|
|
9
|
+
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
10
|
+
Open.write(feature_file + '.class', labels * "\n")
|
11
|
+
|
12
|
+
R.run <<-EOF
|
13
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
14
|
+
labels = scan("#{ feature_file }.class");
|
15
|
+
features = cbind(features, class = labels);
|
16
|
+
#{code}
|
17
|
+
save(model, file='#{model_file}')
|
18
|
+
EOF
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.R_eval(model_file, features, list, code)
|
23
|
+
TmpFile.with_file do |feature_file|
|
24
|
+
TmpFile.with_file do |results|
|
25
|
+
if list
|
26
|
+
Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
|
27
|
+
else
|
28
|
+
Open.write(feature_file, features * "\t" + "\n")
|
29
|
+
end
|
30
|
+
|
31
|
+
io = R.run <<-EOF
|
32
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
33
|
+
load(file="#{model_file}");
|
34
|
+
#{code}
|
35
|
+
cat(paste(label, sep="\\n"));
|
36
|
+
EOF
|
37
|
+
|
38
|
+
res = io.read.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
|
39
|
+
|
40
|
+
if list
|
41
|
+
res
|
42
|
+
else
|
43
|
+
res.first
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
|
50
|
+
@directory = directory
|
51
|
+
FileUtils.mkdir_p @directory unless File.exists? @directory
|
52
|
+
@model_file = File.join(@directory, "model")
|
53
|
+
extract_features = @extract_features
|
54
|
+
train_model = @train_model
|
55
|
+
eval_model = @eval_model
|
56
|
+
@features = []
|
57
|
+
@labels = []
|
58
|
+
end
|
59
|
+
|
60
|
+
def add(element, label = nil)
|
61
|
+
@features << extract_features.call(element)
|
62
|
+
@labels << label unless label.nil?
|
63
|
+
end
|
64
|
+
|
65
|
+
def train
|
66
|
+
case
|
67
|
+
when Proc === train_model
|
68
|
+
train_model.call(@model_file, @features, @labels)
|
69
|
+
when String === train_model
|
70
|
+
SVMModel.R_train(@model_file, @features, @labels, train_model)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def eval(element)
|
75
|
+
case
|
76
|
+
when Proc === eval_model
|
77
|
+
eval_model.call(@model_file, extract_features.call(element), false)
|
78
|
+
when String === eval_model
|
79
|
+
SVMModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def eval_list(elements, extract = true)
|
84
|
+
case
|
85
|
+
when Proc === eval_model
|
86
|
+
eval_model.call(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true)
|
87
|
+
when String === eval_model
|
88
|
+
SVMModel.R_eval(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true, eval_model)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def cross_validation(folds = 10)
|
93
|
+
saved_features = @features
|
94
|
+
saved_labels = @labels
|
95
|
+
seq = (0..features.length - 1).to_a
|
96
|
+
|
97
|
+
chunk_size = features.length / folds
|
98
|
+
|
99
|
+
acc = []
|
100
|
+
folds.times do
|
101
|
+
seq = seq.shuffle
|
102
|
+
eval_chunk = seq[0..chunk_size]
|
103
|
+
train_chunk = seq[chunk_size.. -1]
|
104
|
+
|
105
|
+
eval_features = @features.values_at *eval_chunk
|
106
|
+
eval_labels = @labels.values_at *eval_chunk
|
107
|
+
|
108
|
+
@features = @features.values_at *train_chunk
|
109
|
+
@labels = @labels.values_at *train_chunk
|
110
|
+
|
111
|
+
train
|
112
|
+
predictions = eval_list eval_features, false
|
113
|
+
|
114
|
+
acc << predictions.zip(eval_labels).collect{|pred,lab| pred - lab < 0.5 ? 1 : 0}.inject(0){|acc,e| acc +=e} / chunk_size
|
115
|
+
|
116
|
+
@features = saved_features
|
117
|
+
@labels = saved_labels
|
118
|
+
end
|
119
|
+
|
120
|
+
acc
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'rbbt/network/paths'
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rbbt/sources/string'
|
5
|
+
require 'set'
|
6
|
+
|
7
|
+
|
8
|
+
class TestNetwork < Test::Unit::TestCase
|
9
|
+
def test_dijsktra
|
10
|
+
string = STRING.protein_protein.tsv :persist => false, :fields => ["Interactor Ensembl Protein ID"], :type => :flat
|
11
|
+
string.unnamed = true
|
12
|
+
|
13
|
+
start_node = "ENSP00000256078"
|
14
|
+
end_node = "ENSP00000306245"
|
15
|
+
|
16
|
+
path = Paths.dijkstra(string, start_node, [end_node])
|
17
|
+
|
18
|
+
assert path != nil
|
19
|
+
assert path.include? start_node
|
20
|
+
assert path.include? end_node
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_weighted_dijsktra
|
24
|
+
string = STRING.protein_protein.tsv
|
25
|
+
|
26
|
+
string.process "Score" do |scores|
|
27
|
+
scores.collect{|score| 1000 - score.to_i}
|
28
|
+
end
|
29
|
+
string.unnamed = true
|
30
|
+
|
31
|
+
start_node = "ENSP00000256078"
|
32
|
+
end_node = "ENSP00000306245"
|
33
|
+
|
34
|
+
path = Paths.weighted_dijkstra(string, start_node, end_node)
|
35
|
+
|
36
|
+
assert path != nil
|
37
|
+
assert path.include? start_node
|
38
|
+
assert path.include? end_node
|
39
|
+
|
40
|
+
path = Paths.weighted_dijkstra(string, start_node, Set.new([end_node]))
|
41
|
+
|
42
|
+
assert path != nil
|
43
|
+
assert path.include? start_node
|
44
|
+
assert path.include? end_node
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_random_weighted_dijsktra
|
49
|
+
string = STRING.protein_protein.tsv
|
50
|
+
|
51
|
+
string.process "Score" do |scores|
|
52
|
+
scores.collect{|score| 1000 - score.to_i}
|
53
|
+
end
|
54
|
+
string.unnamed = true
|
55
|
+
|
56
|
+
start_node = "ENSP00000256078"
|
57
|
+
end_node = "ENSP00000306245"
|
58
|
+
|
59
|
+
path = Paths.random_weighted_dijkstra(string, 0.8, start_node, end_node)
|
60
|
+
|
61
|
+
assert path != nil
|
62
|
+
assert path.include? start_node
|
63
|
+
assert path.include? end_node
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
|
@@ -17,7 +17,7 @@ row3 a C Id4
|
|
17
17
|
EOF
|
18
18
|
|
19
19
|
TmpFile.with_file(content) do |filename|
|
20
|
-
tsv = TSV.
|
20
|
+
tsv = TSV.open(filename, :sep => /\s+/)
|
21
21
|
counts = tsv.annotation_counts
|
22
22
|
assert_equal 2, counts['a']
|
23
23
|
end
|
@@ -36,10 +36,9 @@ row7 A B Id3
|
|
36
36
|
EOF
|
37
37
|
|
38
38
|
TmpFile.with_file(content) do |filename|
|
39
|
-
tsv = TSV.
|
39
|
+
tsv = TSV.open(filename, :sep => /\s+/)
|
40
40
|
|
41
|
-
assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
|
42
|
-
assert_equal %w(aa aaa), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue > 0.05 ? annot : nil}.compact
|
41
|
+
assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA", :fdr => false).collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
|
43
42
|
end
|
44
43
|
end
|
45
44
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
|
2
|
+
require 'rbbt/vector/model/svm'
|
3
|
+
require 'rbbt/util/R'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestSVMModel < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_model
|
9
|
+
text =<<-EOF
|
10
|
+
1 0;1;1
|
11
|
+
1 1;0;1
|
12
|
+
1 1;1;1
|
13
|
+
1 0;1;1
|
14
|
+
1 1;1;1
|
15
|
+
0 0;1;0
|
16
|
+
0 1;0;0
|
17
|
+
0 0;1;0
|
18
|
+
0 1;0;0
|
19
|
+
EOF
|
20
|
+
|
21
|
+
TmpFile.with_file() do |dir|
|
22
|
+
FileUtils.mkdir_p dir
|
23
|
+
model = SVMModel.new(dir)
|
24
|
+
|
25
|
+
model.extract_features = Proc.new{|element|
|
26
|
+
element.split(";")
|
27
|
+
}
|
28
|
+
|
29
|
+
text.split(/\n/).each do |line|
|
30
|
+
label, features = line.split(" ")
|
31
|
+
model.add(features, label)
|
32
|
+
end
|
33
|
+
|
34
|
+
model.train
|
35
|
+
|
36
|
+
assert model.eval("1;1;1") > 0.5
|
37
|
+
assert model.eval("0;0;0") < 0.5
|
38
|
+
|
39
|
+
assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v > 0.5}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
|
2
|
+
require 'rbbt/vector/model'
|
3
|
+
require 'rbbt/util/R'
|
4
|
+
require 'test/unit'
|
5
|
+
|
6
|
+
class TestVectorModel < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def test_model
|
9
|
+
text =<<-EOF
|
10
|
+
1 0;1;1
|
11
|
+
1 1;0;1
|
12
|
+
1 1;1;1
|
13
|
+
1 0;1;1
|
14
|
+
1 1;1;1
|
15
|
+
0 0;1;0
|
16
|
+
0 1;0;0
|
17
|
+
0 0;1;0
|
18
|
+
0 1;0;0
|
19
|
+
EOF
|
20
|
+
|
21
|
+
TmpFile.with_file() do |dir|
|
22
|
+
FileUtils.mkdir_p dir
|
23
|
+
model = VectorModel.new(dir)
|
24
|
+
|
25
|
+
model.extract_features = Proc.new{|element|
|
26
|
+
element.split(";")
|
27
|
+
}
|
28
|
+
|
29
|
+
model.train_model = Proc.new{|model_file,features,labels|
|
30
|
+
TmpFile.with_file do |feature_file|
|
31
|
+
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
32
|
+
Open.write(feature_file + '.class', labels * "\n")
|
33
|
+
R.run <<-EOF
|
34
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
35
|
+
labels = scan("#{ feature_file }.class", what=numeric());
|
36
|
+
features = cbind(features, class = labels);
|
37
|
+
library(e1071)
|
38
|
+
model = svm(class ~ ., data = features)
|
39
|
+
save(model, file="#{ model_file }");
|
40
|
+
EOF
|
41
|
+
end
|
42
|
+
}
|
43
|
+
|
44
|
+
model.eval_model = Proc.new{|model_file,features|
|
45
|
+
TmpFile.with_file do |feature_file|
|
46
|
+
TmpFile.with_file do |results|
|
47
|
+
Open.write(feature_file, features * "\t")
|
48
|
+
puts R.run(<<-EOF
|
49
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
50
|
+
library(e1071)
|
51
|
+
load(file="#{ model_file }")
|
52
|
+
label = predict(model, features);
|
53
|
+
cat(label, file="#{results}");
|
54
|
+
EOF
|
55
|
+
).read
|
56
|
+
Open.read(results)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
}
|
61
|
+
|
62
|
+
text.split(/\n/).each do |line|
|
63
|
+
label, features = line.split(" ")
|
64
|
+
model.add(features, label)
|
65
|
+
end
|
66
|
+
|
67
|
+
model.train
|
68
|
+
|
69
|
+
assert model.eval("1;1;1").to_f > 0.5
|
70
|
+
assert model.eval("0;0;0").to_f < 0.5
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2011-10-04 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -32,6 +32,34 @@ dependencies:
|
|
32
32
|
version: "0"
|
33
33
|
type: :runtime
|
34
34
|
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rbbt-entities
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: RubyInline
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
35
63
|
description: Data-mining and statistics
|
36
64
|
email: miguel.vazquez@fdi.ucm.es
|
37
65
|
executables: []
|
@@ -42,10 +70,17 @@ extra_rdoc_files:
|
|
42
70
|
- LICENSE
|
43
71
|
files:
|
44
72
|
- LICENSE
|
73
|
+
- lib/rbbt/network/paths.rb
|
45
74
|
- lib/rbbt/statistics/fdr.rb
|
46
75
|
- lib/rbbt/statistics/hypergeometric.rb
|
76
|
+
- lib/rbbt/statistics/random_walk.rb
|
77
|
+
- lib/rbbt/vector/model.rb
|
78
|
+
- lib/rbbt/vector/model/svm.rb
|
47
79
|
- test/rbbt/statistics/test_fdr.rb
|
48
80
|
- test/rbbt/statistics/test_hypergeometric.rb
|
81
|
+
- test/rbbt/vector/test_model.rb
|
82
|
+
- test/rbbt/vector/model/test_svm.rb
|
83
|
+
- test/rbbt/network/test_paths.rb
|
49
84
|
- test/test_helper.rb
|
50
85
|
has_rdoc: true
|
51
86
|
homepage: http://github.com/mikisvaz/rbbt-phgx
|
@@ -77,11 +112,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
112
|
requirements: []
|
78
113
|
|
79
114
|
rubyforge_project:
|
80
|
-
rubygems_version: 1.
|
115
|
+
rubygems_version: 1.6.2
|
81
116
|
signing_key:
|
82
117
|
specification_version: 3
|
83
118
|
summary: Data-mining and statistics
|
84
119
|
test_files:
|
85
120
|
- test/rbbt/statistics/test_fdr.rb
|
86
121
|
- test/rbbt/statistics/test_hypergeometric.rb
|
122
|
+
- test/rbbt/vector/test_model.rb
|
123
|
+
- test/rbbt/vector/model/test_svm.rb
|
124
|
+
- test/rbbt/network/test_paths.rb
|
87
125
|
- test/test_helper.rb
|