rbbt-dm 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,136 @@
1
+ require 'priority_queue'
2
+ module Paths
3
+
4
+ def self.dijkstra(adjacency, start_node, end_node = nil)
5
+ return nil unless adjacency.include? start_node
6
+
7
+ active = PriorityQueue.new
8
+ distances = Hash.new { 1.0 / 0.0 }
9
+ parents = Hash.new
10
+
11
+ active[start_node] = 0
12
+ best = 1.0 / 0.0
13
+ until active.empty?
14
+ u, distance = active.delete_min
15
+ distances[u] = distance
16
+ d = distance + 1
17
+ adjacency[u].each do |v|
18
+ next unless d < distances[v] and d < best # we can't relax this one
19
+ active[v] = distances[v] = d
20
+ parents[v] = u
21
+ best = d if (String === end_node ? end_node == v : end_node.include?(v))
22
+ end
23
+ end
24
+
25
+
26
+ if end_node
27
+ end_node = (end_node & parents.keys).first unless String === end_node
28
+ return nil if not parents.include? end_node
29
+ path = [end_node]
30
+ while not path.last === start_node
31
+ path << parents[path.last]
32
+ end
33
+ path
34
+ else
35
+ parents
36
+ end
37
+ end
38
+
39
+ def self.weighted_dijkstra(adjacency, start_node, end_node = nil)
40
+ return nil unless adjacency.include? start_node
41
+
42
+ active = PriorityQueue.new
43
+ distances = Hash.new { 1.0 / 0.0 }
44
+ parents = Hash.new
45
+
46
+ active[start_node] = 0
47
+ best = 1.0 / 0.0
48
+ until active.empty?
49
+ u, distance = active.delete_min
50
+ distances[u] = distance
51
+ next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
52
+ Misc.zip_fields(adjacency[u]).each do |v,node_dist|
53
+ d = distance + node_dist
54
+ next unless d < distances[v] and d < best # we can't relax this one
55
+ active[v] = distances[v] = d
56
+ parents[v] = u
57
+ best = d if (String === end_node ? end_node == v : end_node.include?(v))
58
+ end
59
+ end
60
+
61
+ if end_node
62
+ end_node = (end_node & parents.keys).first unless String === end_node
63
+ return nil if not parents.include? end_node
64
+ path = [end_node]
65
+ while not path.last === start_node
66
+ path << parents[path.last]
67
+ end
68
+ path
69
+ else
70
+ parents
71
+ end
72
+ end
73
+
74
+ def self.random_weighted_dijkstra(adjacency, l, start_node, end_node = nil)
75
+ return nil unless adjacency.include? start_node
76
+
77
+ active = PriorityQueue.new
78
+ distances = Hash.new { 1.0 / 0.0 }
79
+ parents = Hash.new
80
+
81
+ active[start_node] = 0
82
+ best = 1.0 / 0.0
83
+ until active.empty?
84
+ u, distance = active.delete_min
85
+ distances[u] = distance
86
+ next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
87
+ Misc.zip_fields(adjacency[u]).each do |v,node_dist|
88
+ d = distance + (node_dist * (l + rand))
89
+ next unless d < distances[v] and d < best # we can't relax this one
90
+ active[v] = distances[v] = d
91
+ parents[v] = u
92
+ best = d if (String === end_node ? end_node == v : end_node.include?(v))
93
+ end
94
+ end
95
+
96
+ if end_node
97
+ end_node = (end_node & parents.keys).first unless String === end_node
98
+ return nil if not parents.include? end_node
99
+ path = [end_node]
100
+ while not path.last === start_node
101
+ path << parents[path.last]
102
+ end
103
+ path
104
+ else
105
+ parents
106
+ end
107
+ end
108
+ end
109
+
110
+ module Entity
111
+ module Adjacent
112
+ def path_to(adjacency, entities)
113
+ if Array === self
114
+ self.collect{|gene| gene.path_to(adjacency, entities)}
115
+ else
116
+ if adjacency.type == :flat
117
+ Paths.dijkstra(adjacency, self, entities)
118
+ else
119
+ Paths.weighted_dijkstra(adjacency, self, entities)
120
+ end
121
+ end
122
+ end
123
+
124
+ def random_paths_to(adjacency, l, times, entities)
125
+ if Array === self
126
+ self.inject([]){|acc,gene| acc += gene.random_paths_to(adjacency, l, times, entities)}
127
+ else
128
+ paths = []
129
+ times.times do
130
+ paths << Paths.random_weighted_dijkstra(adjacency, l, self, entities)
131
+ end
132
+ paths
133
+ end
134
+ end
135
+ end
136
+ end
@@ -1,5 +1,8 @@
1
1
  require 'inline'
2
- require 'rbbt/util/tsv'
2
+ require 'rbbt/tsv'
3
+ require 'rbbt/persist'
4
+ require 'rbbt/statistics/fdr'
5
+ require 'rbbt/entity'
3
6
 
4
7
  module Hypergeometric
5
8
  class << self
@@ -88,49 +91,47 @@ double hypergeometric(double total, double support, double list, double found)
88
91
  end
89
92
  end
90
93
 
91
- class TSV
94
+ module TSV
92
95
 
93
- def annotation_counts(fields = nil)
96
+ def annotation_counts(fields = nil, persistence = false)
94
97
  fields ||= self.fields
95
98
  fields = [fields] if String === fields or Symbol === fields
96
99
 
97
- annotation_count_cache_file = TSV.get_persistence_file(File.basename(filename) + "_" + fields.inspect, File.expand_path(File.dirname(filename)))
98
-
99
- if File.exists?(annotation_count_cache_file)
100
- Log.low "Loading annotation counts from #{ annotation_count_cache_file }"
101
- TCHash.get(annotation_count_cache_file)
102
- else
103
- Log.low "Saving annotation counts to #{ annotation_count_cache_file }"
104
- hash = TCHash.get(annotation_count_cache_file)
105
-
106
- counts = Hash.new(0)
107
- through :main, fields do |key, values|
108
- values.flatten.compact.uniq.each{|value| counts[value] += 1}
100
+ Persist.persist(filename, :marshal, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
101
+ data ||= Hash.new(0)
102
+ through :key, fields do |key, values|
103
+ values.flatten.compact.uniq.each{|value| data[value] += 1}
109
104
  end
110
- hash.merge! counts
105
+
106
+ data
111
107
  end
112
108
  end
113
109
 
114
- def enrichment(list, fields, options = {})
115
- options = Misc.add_defaults options, :min_support => 3
110
+ def enrichment(list, fields = nil, options = {})
111
+ fields ||= self.fields.first
112
+ options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false
116
113
  Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
117
- selected = select :main => list
118
-
114
+
115
+ selected = select :key => list
116
+
119
117
  tsv_size = keys.length
120
118
  total = selected.keys.length
121
119
  Log.debug "Found #{total} of #{list.length} entities"
122
120
 
123
- counts = annotation_counts fields
121
+ counts = annotation_counts fields, options[:persist]
124
122
 
125
123
  annotations = Hash.new 0
126
- selected.through :main, fields do |key, values|
127
- values.flatten.compact.uniq.each{|value| annotations[value] += 1}
124
+ with_unnamed do
125
+ selected.through :key, fields do |key, values|
126
+ values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
127
+ annotations[value] += 1
128
+ }
129
+ end
128
130
  end
129
131
 
130
132
  pvalues = {}
131
133
  annotations.each do |annotation, count|
132
- Log.debug "Hypergeometric: #{ annotation } - #{[tsv_size, counts[annotation], total, count].inspect}"
133
- next if count < options[:min_support]
134
+ next if count < options[:min_support] or not counts.include? annotation
134
135
  pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
135
136
  pvalues[annotation] = pvalue
136
137
  end
@@ -138,9 +139,30 @@ class TSV
138
139
  FDR.adjust_hash! pvalues if options[:fdr]
139
140
  pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
140
141
 
142
+ TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
143
+
141
144
  pvalues
142
145
  end
146
+
147
+ def enrichment_for(tsv, field, options = {} )
148
+ tsv = tsv.tsv if Path === tsv
149
+ index = TSV.find_traversal(self, tsv, :in_namespace => false, :persist_input => true)
150
+
151
+ raise "Cannot traverse identifiers" if index.nil?
152
+
153
+ source_keys = index.values_at(*self.keys).flatten.compact.uniq
154
+
155
+ tsv.enrichment source_keys, field, options
156
+ end
143
157
  end
144
158
 
159
+ module Entity
160
+ module Enriched
161
+ def enrichment(file, fields = nil, options = {})
162
+ file = file.tsv if Path === file
163
+ file.enrichment self, fields, options
164
+ end
165
+ end
166
+ end
145
167
 
146
168
 
@@ -0,0 +1,158 @@
1
+ require 'png'
2
+ require 'inline'
3
+
4
+ module RandomWalk
5
+
6
+ class << self
7
+ inline do |builder|
8
+
9
+ builder.c_raw <<-'EOC'
10
+ double weight(int position, int mean){
11
+ double rel_pos = (double) abs(position - mean) / mean;
12
+ double weight = 0.3 * 0.5 * rel_pos + 0.7 * (exp(30*rel_pos)/exp(30));
13
+ return(weight);
14
+ }
15
+ EOC
16
+
17
+ builder.c <<-'EOC'
18
+ double fast_score_scale(VALUE positions, int total, int missing){
19
+ int idx;
20
+
21
+ int mean = total / 2;
22
+
23
+ VALUE rel_q = rb_ary_new();
24
+ VALUE rel_l = rb_ary_new();
25
+
26
+ rb_ary_push(rel_q,rb_float_new(0));
27
+
28
+ // Rescale positions and accumulate weights
29
+ double total_weights = 0;
30
+ for (idx = 0; idx < RARRAY(positions)->len; idx++){
31
+ int position = FIX2INT(rb_ary_entry(positions, idx));
32
+
33
+ rb_ary_push(rel_l, rb_float_new((double) position / total));
34
+
35
+ total_weights += weight(position, mean);
36
+ rb_ary_push(rel_q, rb_float_new(total_weights));
37
+ }
38
+
39
+ // Add penalty for missing genes
40
+ double penalty = missing * weight(mean * 0.8, mean);
41
+ total_weights = total_weights + penalty;
42
+
43
+ // Traverse list and get extreme values
44
+ double max_top, max_bottom;
45
+ max_top = max_bottom = 0;
46
+ for (idx = 0; idx < RARRAY(positions)->len; idx++){
47
+ double top = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
48
+ RFLOAT(rb_ary_entry(rel_l, idx))->value;
49
+ double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
50
+ RFLOAT(rb_ary_entry(rel_l, idx))->value;
51
+
52
+ if (top > max_top) max_top = top;
53
+ if (bottom > max_bottom) max_bottom = bottom;
54
+ }
55
+
56
+ if (max_top > max_bottom) return max_top;
57
+ else return -max_bottom;
58
+ }
59
+ EOC
60
+
61
+ end
62
+ end
63
+
64
+ class << self
65
+ alias score fast_score_scale
66
+ end
67
+
68
+ def self.combine(up, down)
69
+ return down if up == 0
70
+ return up if down == 0
71
+
72
+ return up - down
73
+ if (up > 0) == (down > 0)
74
+ return 0
75
+ else
76
+ up - down
77
+ end
78
+ end
79
+
80
+ def self.score_up_down(up, down, total, missing = 0)
81
+ scores_up = score(up, total, missing)
82
+ scores_down = score(down, total, missing)
83
+
84
+ combine(scores_up, scores_down)
85
+ end
86
+
87
+ # Two sided
88
+ def self.permutations(size, total, missing = 0, times = 10000)
89
+ if size == 0
90
+ [0] * times
91
+ else
92
+ (1..times).collect do
93
+ score(Array.new(size){ (rand * total).to_i }.sort, total, missing).abs
94
+ end
95
+ end
96
+ end
97
+
98
+ def self.permutations_up_down(size_up, size_down, total, missing = 0, times = 10000)
99
+ (1..times).collect do
100
+ score_up_down(Array.new(size_up){ (rand * total).to_i }.sort, Array.new(size_down){ (rand * total).to_i }.sort, total, missing).abs
101
+ end
102
+ end
103
+
104
+ def self.pvalue(permutations, score)
105
+ score = score.abs
106
+ permutations.inject(0){|acc, per|
107
+ acc += 1 if per > score
108
+ acc
109
+ }.to_f / permutations.length
110
+ end
111
+
112
+ COLORS = {
113
+ :red => PNG::Color::Red,
114
+ :green => PNG::Color::Green,
115
+ :white => PNG::Color::White,
116
+ :black => PNG::Color::Black,
117
+
118
+ }
119
+
120
+ def self.draw_hits(hits, total, filename = nil, options = {})
121
+
122
+ size = options[:size] || total
123
+ bg_color = options[:bg_color] || :white
124
+ width = options[:width] || 20
125
+ sections = options[:sections] || []
126
+
127
+ size = [size, total].min
128
+
129
+ hits = hits.collect{|h| h - 1}
130
+ if size < total
131
+ hits = hits.collect{|h| (h.to_f * size / total).to_i}
132
+ end
133
+
134
+ canvas = PNG::Canvas.new size, width, COLORS[bg_color]
135
+
136
+ sections.each{|color, info|
137
+ start = info[0]
138
+ finish = info[1]
139
+ (start..finish).each{|x|
140
+ (0..width - 1).each{|y|
141
+ canvas[x,y] = COLORS[color]
142
+ }
143
+ }
144
+ }
145
+
146
+ hits.each{|hit|
147
+ canvas.line hit, 0, hit , width - 1, PNG::Color::Black
148
+ }
149
+
150
+ png = PNG.new canvas
151
+
152
+ if filename
153
+ png.save filename
154
+ else
155
+ png.to_blob
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,20 @@
1
+ require 'rbbt/vector/model'
2
+ class SVMModel < VectorModel
3
+ def initialize(dir)
4
+ super(dir)
5
+
6
+ @extract_features = Proc.new{|element|
7
+ element
8
+ }
9
+
10
+ @train_model =<<-EOF
11
+ library(e1071);
12
+ model = svm(class ~ ., data = features);
13
+ EOF
14
+
15
+ @eval_model =<<-EOF
16
+ library(e1071);
17
+ label = predict(model, features);
18
+ EOF
19
+ end
20
+ end
@@ -0,0 +1,122 @@
1
+ require 'rbbt/util/R'
2
+
3
+ class VectorModel
4
+ attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
5
+ attr_accessor :features, :labels
6
+
7
+ def self.R_train(model_file, features, labels, code)
8
+ TmpFile.with_file do |feature_file|
9
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
10
+ Open.write(feature_file + '.class', labels * "\n")
11
+
12
+ R.run <<-EOF
13
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
14
+ labels = scan("#{ feature_file }.class");
15
+ features = cbind(features, class = labels);
16
+ #{code}
17
+ save(model, file='#{model_file}')
18
+ EOF
19
+ end
20
+ end
21
+
22
+ def self.R_eval(model_file, features, list, code)
23
+ TmpFile.with_file do |feature_file|
24
+ TmpFile.with_file do |results|
25
+ if list
26
+ Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
27
+ else
28
+ Open.write(feature_file, features * "\t" + "\n")
29
+ end
30
+
31
+ io = R.run <<-EOF
32
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
33
+ load(file="#{model_file}");
34
+ #{code}
35
+ cat(paste(label, sep="\\n"));
36
+ EOF
37
+
38
+ res = io.read.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
39
+
40
+ if list
41
+ res
42
+ else
43
+ res.first
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
50
+ @directory = directory
51
+ FileUtils.mkdir_p @directory unless File.exists? @directory
52
+ @model_file = File.join(@directory, "model")
53
+ extract_features = @extract_features
54
+ train_model = @train_model
55
+ eval_model = @eval_model
56
+ @features = []
57
+ @labels = []
58
+ end
59
+
60
+ def add(element, label = nil)
61
+ @features << extract_features.call(element)
62
+ @labels << label unless label.nil?
63
+ end
64
+
65
+ def train
66
+ case
67
+ when Proc === train_model
68
+ train_model.call(@model_file, @features, @labels)
69
+ when String === train_model
70
+ SVMModel.R_train(@model_file, @features, @labels, train_model)
71
+ end
72
+ end
73
+
74
+ def eval(element)
75
+ case
76
+ when Proc === eval_model
77
+ eval_model.call(@model_file, extract_features.call(element), false)
78
+ when String === eval_model
79
+ SVMModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
80
+ end
81
+ end
82
+
83
+ def eval_list(elements, extract = true)
84
+ case
85
+ when Proc === eval_model
86
+ eval_model.call(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true)
87
+ when String === eval_model
88
+ SVMModel.R_eval(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true, eval_model)
89
+ end
90
+ end
91
+
92
+ def cross_validation(folds = 10)
93
+ saved_features = @features
94
+ saved_labels = @labels
95
+ seq = (0..features.length - 1).to_a
96
+
97
+ chunk_size = features.length / folds
98
+
99
+ acc = []
100
+ folds.times do
101
+ seq = seq.shuffle
102
+ eval_chunk = seq[0..chunk_size]
103
+ train_chunk = seq[chunk_size.. -1]
104
+
105
+ eval_features = @features.values_at *eval_chunk
106
+ eval_labels = @labels.values_at *eval_chunk
107
+
108
+ @features = @features.values_at *train_chunk
109
+ @labels = @labels.values_at *train_chunk
110
+
111
+ train
112
+ predictions = eval_list eval_features, false
113
+
114
+ acc << predictions.zip(eval_labels).collect{|pred,lab| pred - lab < 0.5 ? 1 : 0}.inject(0){|acc,e| acc +=e} / chunk_size
115
+
116
+ @features = saved_features
117
+ @labels = saved_labels
118
+ end
119
+
120
+ acc
121
+ end
122
+ end
@@ -0,0 +1,68 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/network/paths'
3
+ require 'test/unit'
4
+ require 'rbbt/sources/string'
5
+ require 'set'
6
+
7
+
8
+ class TestNetwork < Test::Unit::TestCase
9
+ def test_dijsktra
10
+ string = STRING.protein_protein.tsv :persist => false, :fields => ["Interactor Ensembl Protein ID"], :type => :flat
11
+ string.unnamed = true
12
+
13
+ start_node = "ENSP00000256078"
14
+ end_node = "ENSP00000306245"
15
+
16
+ path = Paths.dijkstra(string, start_node, [end_node])
17
+
18
+ assert path != nil
19
+ assert path.include? start_node
20
+ assert path.include? end_node
21
+ end
22
+
23
+ def test_weighted_dijsktra
24
+ string = STRING.protein_protein.tsv
25
+
26
+ string.process "Score" do |scores|
27
+ scores.collect{|score| 1000 - score.to_i}
28
+ end
29
+ string.unnamed = true
30
+
31
+ start_node = "ENSP00000256078"
32
+ end_node = "ENSP00000306245"
33
+
34
+ path = Paths.weighted_dijkstra(string, start_node, end_node)
35
+
36
+ assert path != nil
37
+ assert path.include? start_node
38
+ assert path.include? end_node
39
+
40
+ path = Paths.weighted_dijkstra(string, start_node, Set.new([end_node]))
41
+
42
+ assert path != nil
43
+ assert path.include? start_node
44
+ assert path.include? end_node
45
+
46
+ end
47
+
48
+ def test_random_weighted_dijsktra
49
+ string = STRING.protein_protein.tsv
50
+
51
+ string.process "Score" do |scores|
52
+ scores.collect{|score| 1000 - score.to_i}
53
+ end
54
+ string.unnamed = true
55
+
56
+ start_node = "ENSP00000256078"
57
+ end_node = "ENSP00000306245"
58
+
59
+ path = Paths.random_weighted_dijkstra(string, 0.8, start_node, end_node)
60
+
61
+ assert path != nil
62
+ assert path.include? start_node
63
+ assert path.include? end_node
64
+ end
65
+
66
+ end
67
+
68
+
@@ -17,7 +17,7 @@ row3 a C Id4
17
17
  EOF
18
18
 
19
19
  TmpFile.with_file(content) do |filename|
20
- tsv = TSV.new(filename + '#:sep=/\s+/')
20
+ tsv = TSV.open(filename, :sep => /\s+/)
21
21
  counts = tsv.annotation_counts
22
22
  assert_equal 2, counts['a']
23
23
  end
@@ -36,10 +36,9 @@ row7 A B Id3
36
36
  EOF
37
37
 
38
38
  TmpFile.with_file(content) do |filename|
39
- tsv = TSV.new(filename + '#:sep=/\s+/')
39
+ tsv = TSV.open(filename, :sep => /\s+/)
40
40
 
41
- assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
42
- assert_equal %w(aa aaa), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue > 0.05 ? annot : nil}.compact
41
+ assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA", :fdr => false).collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
43
42
  end
44
43
  end
45
44
  end
@@ -0,0 +1,43 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
2
+ require 'rbbt/vector/model/svm'
3
+ require 'rbbt/util/R'
4
+ require 'test/unit'
5
+
6
+ class TestSVMModel < Test::Unit::TestCase
7
+
8
+ def test_model
9
+ text =<<-EOF
10
+ 1 0;1;1
11
+ 1 1;0;1
12
+ 1 1;1;1
13
+ 1 0;1;1
14
+ 1 1;1;1
15
+ 0 0;1;0
16
+ 0 1;0;0
17
+ 0 0;1;0
18
+ 0 1;0;0
19
+ EOF
20
+
21
+ TmpFile.with_file() do |dir|
22
+ FileUtils.mkdir_p dir
23
+ model = SVMModel.new(dir)
24
+
25
+ model.extract_features = Proc.new{|element|
26
+ element.split(";")
27
+ }
28
+
29
+ text.split(/\n/).each do |line|
30
+ label, features = line.split(" ")
31
+ model.add(features, label)
32
+ end
33
+
34
+ model.train
35
+
36
+ assert model.eval("1;1;1") > 0.5
37
+ assert model.eval("0;0;0") < 0.5
38
+
39
+ assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v > 0.5}
40
+ end
41
+ end
42
+
43
+ end
@@ -0,0 +1,74 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/vector/model'
3
+ require 'rbbt/util/R'
4
+ require 'test/unit'
5
+
6
+ class TestVectorModel < Test::Unit::TestCase
7
+
8
+ def test_model
9
+ text =<<-EOF
10
+ 1 0;1;1
11
+ 1 1;0;1
12
+ 1 1;1;1
13
+ 1 0;1;1
14
+ 1 1;1;1
15
+ 0 0;1;0
16
+ 0 1;0;0
17
+ 0 0;1;0
18
+ 0 1;0;0
19
+ EOF
20
+
21
+ TmpFile.with_file() do |dir|
22
+ FileUtils.mkdir_p dir
23
+ model = VectorModel.new(dir)
24
+
25
+ model.extract_features = Proc.new{|element|
26
+ element.split(";")
27
+ }
28
+
29
+ model.train_model = Proc.new{|model_file,features,labels|
30
+ TmpFile.with_file do |feature_file|
31
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
32
+ Open.write(feature_file + '.class', labels * "\n")
33
+ R.run <<-EOF
34
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
35
+ labels = scan("#{ feature_file }.class", what=numeric());
36
+ features = cbind(features, class = labels);
37
+ library(e1071)
38
+ model = svm(class ~ ., data = features)
39
+ save(model, file="#{ model_file }");
40
+ EOF
41
+ end
42
+ }
43
+
44
+ model.eval_model = Proc.new{|model_file,features|
45
+ TmpFile.with_file do |feature_file|
46
+ TmpFile.with_file do |results|
47
+ Open.write(feature_file, features * "\t")
48
+ puts R.run(<<-EOF
49
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
50
+ library(e1071)
51
+ load(file="#{ model_file }")
52
+ label = predict(model, features);
53
+ cat(label, file="#{results}");
54
+ EOF
55
+ ).read
56
+ Open.read(results)
57
+ end
58
+ end
59
+
60
+ }
61
+
62
+ text.split(/\n/).each do |line|
63
+ label, features = line.split(" ")
64
+ model.add(features, label)
65
+ end
66
+
67
+ model.train
68
+
69
+ assert model.eval("1;1;1").to_f > 0.5
70
+ assert model.eval("0;0;0").to_f < 0.5
71
+ end
72
+ end
73
+
74
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease: false
4
+ hash: 27
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-12-20 00:00:00 +01:00
18
+ date: 2011-10-04 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -32,6 +32,34 @@ dependencies:
32
32
  version: "0"
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rbbt-entities
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: RubyInline
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :runtime
62
+ version_requirements: *id003
35
63
  description: Data-mining and statistics
36
64
  email: miguel.vazquez@fdi.ucm.es
37
65
  executables: []
@@ -42,10 +70,17 @@ extra_rdoc_files:
42
70
  - LICENSE
43
71
  files:
44
72
  - LICENSE
73
+ - lib/rbbt/network/paths.rb
45
74
  - lib/rbbt/statistics/fdr.rb
46
75
  - lib/rbbt/statistics/hypergeometric.rb
76
+ - lib/rbbt/statistics/random_walk.rb
77
+ - lib/rbbt/vector/model.rb
78
+ - lib/rbbt/vector/model/svm.rb
47
79
  - test/rbbt/statistics/test_fdr.rb
48
80
  - test/rbbt/statistics/test_hypergeometric.rb
81
+ - test/rbbt/vector/test_model.rb
82
+ - test/rbbt/vector/model/test_svm.rb
83
+ - test/rbbt/network/test_paths.rb
49
84
  - test/test_helper.rb
50
85
  has_rdoc: true
51
86
  homepage: http://github.com/mikisvaz/rbbt-phgx
@@ -77,11 +112,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
112
  requirements: []
78
113
 
79
114
  rubyforge_project:
80
- rubygems_version: 1.3.7
115
+ rubygems_version: 1.6.2
81
116
  signing_key:
82
117
  specification_version: 3
83
118
  summary: Data-mining and statistics
84
119
  test_files:
85
120
  - test/rbbt/statistics/test_fdr.rb
86
121
  - test/rbbt/statistics/test_hypergeometric.rb
122
+ - test/rbbt/vector/test_model.rb
123
+ - test/rbbt/vector/model/test_svm.rb
124
+ - test/rbbt/network/test_paths.rb
87
125
  - test/test_helper.rb