rbbt-dm 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ require 'priority_queue'
2
+ module Paths
3
+
4
+ def self.dijkstra(adjacency, start_node, end_node = nil)
5
+ return nil unless adjacency.include? start_node
6
+
7
+ active = PriorityQueue.new
8
+ distances = Hash.new { 1.0 / 0.0 }
9
+ parents = Hash.new
10
+
11
+ active[start_node] = 0
12
+ best = 1.0 / 0.0
13
+ until active.empty?
14
+ u, distance = active.delete_min
15
+ distances[u] = distance
16
+ d = distance + 1
17
+ adjacency[u].each do |v|
18
+ next unless d < distances[v] and d < best # we can't relax this one
19
+ active[v] = distances[v] = d
20
+ parents[v] = u
21
+ best = d if (String === end_node ? end_node == v : end_node.include?(v))
22
+ end
23
+ end
24
+
25
+
26
+ if end_node
27
+ end_node = (end_node & parents.keys).first unless String === end_node
28
+ return nil if not parents.include? end_node
29
+ path = [end_node]
30
+ while not path.last === start_node
31
+ path << parents[path.last]
32
+ end
33
+ path
34
+ else
35
+ parents
36
+ end
37
+ end
38
+
39
+ def self.weighted_dijkstra(adjacency, start_node, end_node = nil)
40
+ return nil unless adjacency.include? start_node
41
+
42
+ active = PriorityQueue.new
43
+ distances = Hash.new { 1.0 / 0.0 }
44
+ parents = Hash.new
45
+
46
+ active[start_node] = 0
47
+ best = 1.0 / 0.0
48
+ until active.empty?
49
+ u, distance = active.delete_min
50
+ distances[u] = distance
51
+ next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
52
+ Misc.zip_fields(adjacency[u]).each do |v,node_dist|
53
+ d = distance + node_dist
54
+ next unless d < distances[v] and d < best # we can't relax this one
55
+ active[v] = distances[v] = d
56
+ parents[v] = u
57
+ best = d if (String === end_node ? end_node == v : end_node.include?(v))
58
+ end
59
+ end
60
+
61
+ if end_node
62
+ end_node = (end_node & parents.keys).first unless String === end_node
63
+ return nil if not parents.include? end_node
64
+ path = [end_node]
65
+ while not path.last === start_node
66
+ path << parents[path.last]
67
+ end
68
+ path
69
+ else
70
+ parents
71
+ end
72
+ end
73
+
74
+ def self.random_weighted_dijkstra(adjacency, l, start_node, end_node = nil)
75
+ return nil unless adjacency.include? start_node
76
+
77
+ active = PriorityQueue.new
78
+ distances = Hash.new { 1.0 / 0.0 }
79
+ parents = Hash.new
80
+
81
+ active[start_node] = 0
82
+ best = 1.0 / 0.0
83
+ until active.empty?
84
+ u, distance = active.delete_min
85
+ distances[u] = distance
86
+ next if not adjacency.include?(u) or adjacency[u].nil? or adjacency[u].empty?
87
+ Misc.zip_fields(adjacency[u]).each do |v,node_dist|
88
+ d = distance + (node_dist * (l + rand))
89
+ next unless d < distances[v] and d < best # we can't relax this one
90
+ active[v] = distances[v] = d
91
+ parents[v] = u
92
+ best = d if (String === end_node ? end_node == v : end_node.include?(v))
93
+ end
94
+ end
95
+
96
+ if end_node
97
+ end_node = (end_node & parents.keys).first unless String === end_node
98
+ return nil if not parents.include? end_node
99
+ path = [end_node]
100
+ while not path.last === start_node
101
+ path << parents[path.last]
102
+ end
103
+ path
104
+ else
105
+ parents
106
+ end
107
+ end
108
+ end
109
+
110
+ module Entity
111
+ module Adjacent
112
+ def path_to(adjacency, entities)
113
+ if Array === self
114
+ self.collect{|gene| gene.path_to(adjacency, entities)}
115
+ else
116
+ if adjacency.type == :flat
117
+ Paths.dijkstra(adjacency, self, entities)
118
+ else
119
+ Paths.weighted_dijkstra(adjacency, self, entities)
120
+ end
121
+ end
122
+ end
123
+
124
+ def random_paths_to(adjacency, l, times, entities)
125
+ if Array === self
126
+ self.inject([]){|acc,gene| acc += gene.random_paths_to(adjacency, l, times, entities)}
127
+ else
128
+ paths = []
129
+ times.times do
130
+ paths << Paths.random_weighted_dijkstra(adjacency, l, self, entities)
131
+ end
132
+ paths
133
+ end
134
+ end
135
+ end
136
+ end
@@ -1,5 +1,8 @@
1
1
  require 'inline'
2
- require 'rbbt/util/tsv'
2
+ require 'rbbt/tsv'
3
+ require 'rbbt/persist'
4
+ require 'rbbt/statistics/fdr'
5
+ require 'rbbt/entity'
3
6
 
4
7
  module Hypergeometric
5
8
  class << self
@@ -88,49 +91,47 @@ double hypergeometric(double total, double support, double list, double found)
88
91
  end
89
92
  end
90
93
 
91
- class TSV
94
+ module TSV
92
95
 
93
- def annotation_counts(fields = nil)
96
+ def annotation_counts(fields = nil, persistence = false)
94
97
  fields ||= self.fields
95
98
  fields = [fields] if String === fields or Symbol === fields
96
99
 
97
- annotation_count_cache_file = TSV.get_persistence_file(File.basename(filename) + "_" + fields.inspect, File.expand_path(File.dirname(filename)))
98
-
99
- if File.exists?(annotation_count_cache_file)
100
- Log.low "Loading annotation counts from #{ annotation_count_cache_file }"
101
- TCHash.get(annotation_count_cache_file)
102
- else
103
- Log.low "Saving annotation counts to #{ annotation_count_cache_file }"
104
- hash = TCHash.get(annotation_count_cache_file)
105
-
106
- counts = Hash.new(0)
107
- through :main, fields do |key, values|
108
- values.flatten.compact.uniq.each{|value| counts[value] += 1}
100
+ Persist.persist(filename, :marshal, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
101
+ data ||= Hash.new(0)
102
+ through :key, fields do |key, values|
103
+ values.flatten.compact.uniq.each{|value| data[value] += 1}
109
104
  end
110
- hash.merge! counts
105
+
106
+ data
111
107
  end
112
108
  end
113
109
 
114
- def enrichment(list, fields, options = {})
115
- options = Misc.add_defaults options, :min_support => 3
110
+ def enrichment(list, fields = nil, options = {})
111
+ fields ||= self.fields.first
112
+ options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false
116
113
  Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
117
- selected = select :main => list
118
-
114
+
115
+ selected = select :key => list
116
+
119
117
  tsv_size = keys.length
120
118
  total = selected.keys.length
121
119
  Log.debug "Found #{total} of #{list.length} entities"
122
120
 
123
- counts = annotation_counts fields
121
+ counts = annotation_counts fields, options[:persist]
124
122
 
125
123
  annotations = Hash.new 0
126
- selected.through :main, fields do |key, values|
127
- values.flatten.compact.uniq.each{|value| annotations[value] += 1}
124
+ with_unnamed do
125
+ selected.through :key, fields do |key, values|
126
+ values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
127
+ annotations[value] += 1
128
+ }
129
+ end
128
130
  end
129
131
 
130
132
  pvalues = {}
131
133
  annotations.each do |annotation, count|
132
- Log.debug "Hypergeometric: #{ annotation } - #{[tsv_size, counts[annotation], total, count].inspect}"
133
- next if count < options[:min_support]
134
+ next if count < options[:min_support] or not counts.include? annotation
134
135
  pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
135
136
  pvalues[annotation] = pvalue
136
137
  end
@@ -138,9 +139,30 @@ class TSV
138
139
  FDR.adjust_hash! pvalues if options[:fdr]
139
140
  pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
140
141
 
142
+ TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
143
+
141
144
  pvalues
142
145
  end
146
+
147
+ def enrichment_for(tsv, field, options = {} )
148
+ tsv = tsv.tsv if Path === tsv
149
+ index = TSV.find_traversal(self, tsv, :in_namespace => false, :persist_input => true)
150
+
151
+ raise "Cannot traverse identifiers" if index.nil?
152
+
153
+ source_keys = index.values_at(*self.keys).flatten.compact.uniq
154
+
155
+ tsv.enrichment source_keys, field, options
156
+ end
143
157
  end
144
158
 
159
+ module Entity
160
+ module Enriched
161
+ def enrichment(file, fields = nil, options = {})
162
+ file = file.tsv if Path === file
163
+ file.enrichment self, fields, options
164
+ end
165
+ end
166
+ end
145
167
 
146
168
 
@@ -0,0 +1,158 @@
1
+ require 'png'
2
+ require 'inline'
3
+
4
+ module RandomWalk
5
+
6
+ class << self
7
+ inline do |builder|
8
+
9
+ builder.c_raw <<-'EOC'
10
+ double weight(int position, int mean){
11
+ double rel_pos = (double) abs(position - mean) / mean;
12
+ double weight = 0.3 * 0.5 * rel_pos + 0.7 * (exp(30*rel_pos)/exp(30));
13
+ return(weight);
14
+ }
15
+ EOC
16
+
17
+ builder.c <<-'EOC'
18
+ double fast_score_scale(VALUE positions, int total, int missing){
19
+ int idx;
20
+
21
+ int mean = total / 2;
22
+
23
+ VALUE rel_q = rb_ary_new();
24
+ VALUE rel_l = rb_ary_new();
25
+
26
+ rb_ary_push(rel_q,rb_float_new(0));
27
+
28
+ // Rescale positions and accumulate weights
29
+ double total_weights = 0;
30
+ for (idx = 0; idx < RARRAY(positions)->len; idx++){
31
+ int position = FIX2INT(rb_ary_entry(positions, idx));
32
+
33
+ rb_ary_push(rel_l, rb_float_new((double) position / total));
34
+
35
+ total_weights += weight(position, mean);
36
+ rb_ary_push(rel_q, rb_float_new(total_weights));
37
+ }
38
+
39
+ // Add penalty for missing genes
40
+ double penalty = missing * weight(mean * 0.8, mean);
41
+ total_weights = total_weights + penalty;
42
+
43
+ // Traverse list and get extreme values
44
+ double max_top, max_bottom;
45
+ max_top = max_bottom = 0;
46
+ for (idx = 0; idx < RARRAY(positions)->len; idx++){
47
+ double top = RFLOAT(rb_ary_entry(rel_q, idx + 1))->value / total_weights -
48
+ RFLOAT(rb_ary_entry(rel_l, idx))->value;
49
+ double bottom = - (penalty + RFLOAT(rb_ary_entry(rel_q, idx))->value) / total_weights +
50
+ RFLOAT(rb_ary_entry(rel_l, idx))->value;
51
+
52
+ if (top > max_top) max_top = top;
53
+ if (bottom > max_bottom) max_bottom = bottom;
54
+ }
55
+
56
+ if (max_top > max_bottom) return max_top;
57
+ else return -max_bottom;
58
+ }
59
+ EOC
60
+
61
+ end
62
+ end
63
+
64
+ class << self
65
+ alias score fast_score_scale
66
+ end
67
+
68
+ def self.combine(up, down)
69
+ return down if up == 0
70
+ return up if down == 0
71
+
72
+ return up - down
73
+ if (up > 0) == (down > 0)
74
+ return 0
75
+ else
76
+ up - down
77
+ end
78
+ end
79
+
80
+ def self.score_up_down(up, down, total, missing = 0)
81
+ scores_up = score(up, total, missing)
82
+ scores_down = score(down, total, missing)
83
+
84
+ combine(scores_up, scores_down)
85
+ end
86
+
87
+ # Two sided
88
+ def self.permutations(size, total, missing = 0, times = 10000)
89
+ if size == 0
90
+ [0] * times
91
+ else
92
+ (1..times).collect do
93
+ score(Array.new(size){ (rand * total).to_i }.sort, total, missing).abs
94
+ end
95
+ end
96
+ end
97
+
98
+ def self.permutations_up_down(size_up, size_down, total, missing = 0, times = 10000)
99
+ (1..times).collect do
100
+ score_up_down(Array.new(size_up){ (rand * total).to_i }.sort, Array.new(size_down){ (rand * total).to_i }.sort, total, missing).abs
101
+ end
102
+ end
103
+
104
+ def self.pvalue(permutations, score)
105
+ score = score.abs
106
+ permutations.inject(0){|acc, per|
107
+ acc += 1 if per > score
108
+ acc
109
+ }.to_f / permutations.length
110
+ end
111
+
112
+ COLORS = {
113
+ :red => PNG::Color::Red,
114
+ :green => PNG::Color::Green,
115
+ :white => PNG::Color::White,
116
+ :black => PNG::Color::Black,
117
+
118
+ }
119
+
120
+ def self.draw_hits(hits, total, filename = nil, options = {})
121
+
122
+ size = options[:size] || total
123
+ bg_color = options[:bg_color] || :white
124
+ width = options[:width] || 20
125
+ sections = options[:sections] || []
126
+
127
+ size = [size, total].min
128
+
129
+ hits = hits.collect{|h| h - 1}
130
+ if size < total
131
+ hits = hits.collect{|h| (h.to_f * size / total).to_i}
132
+ end
133
+
134
+ canvas = PNG::Canvas.new size, width, COLORS[bg_color]
135
+
136
+ sections.each{|color, info|
137
+ start = info[0]
138
+ finish = info[1]
139
+ (start..finish).each{|x|
140
+ (0..width - 1).each{|y|
141
+ canvas[x,y] = COLORS[color]
142
+ }
143
+ }
144
+ }
145
+
146
+ hits.each{|hit|
147
+ canvas.line hit, 0, hit , width - 1, PNG::Color::Black
148
+ }
149
+
150
+ png = PNG.new canvas
151
+
152
+ if filename
153
+ png.save filename
154
+ else
155
+ png.to_blob
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,20 @@
1
+ require 'rbbt/vector/model'
2
+ class SVMModel < VectorModel
3
+ def initialize(dir)
4
+ super(dir)
5
+
6
+ @extract_features = Proc.new{|element|
7
+ element
8
+ }
9
+
10
+ @train_model =<<-EOF
11
+ library(e1071);
12
+ model = svm(class ~ ., data = features);
13
+ EOF
14
+
15
+ @eval_model =<<-EOF
16
+ library(e1071);
17
+ label = predict(model, features);
18
+ EOF
19
+ end
20
+ end
@@ -0,0 +1,122 @@
1
+ require 'rbbt/util/R'
2
+
3
+ class VectorModel
4
+ attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
5
+ attr_accessor :features, :labels
6
+
7
+ def self.R_train(model_file, features, labels, code)
8
+ TmpFile.with_file do |feature_file|
9
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
10
+ Open.write(feature_file + '.class', labels * "\n")
11
+
12
+ R.run <<-EOF
13
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
14
+ labels = scan("#{ feature_file }.class");
15
+ features = cbind(features, class = labels);
16
+ #{code}
17
+ save(model, file='#{model_file}')
18
+ EOF
19
+ end
20
+ end
21
+
22
+ def self.R_eval(model_file, features, list, code)
23
+ TmpFile.with_file do |feature_file|
24
+ TmpFile.with_file do |results|
25
+ if list
26
+ Open.write(feature_file, features.collect{|feat| feat * "\t"} * "\n" + "\n")
27
+ else
28
+ Open.write(feature_file, features * "\t" + "\n")
29
+ end
30
+
31
+ io = R.run <<-EOF
32
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
33
+ load(file="#{model_file}");
34
+ #{code}
35
+ cat(paste(label, sep="\\n"));
36
+ EOF
37
+
38
+ res = io.read.sub(/WARNING: .*?\n/s,'').split(/\s+/).collect{|l| l.to_f}
39
+
40
+ if list
41
+ res
42
+ else
43
+ res.first
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ def initialize(directory, extract_features = nil, train_model = nil, eval_model = nil)
50
+ @directory = directory
51
+ FileUtils.mkdir_p @directory unless File.exists? @directory
52
+ @model_file = File.join(@directory, "model")
53
+ extract_features = @extract_features
54
+ train_model = @train_model
55
+ eval_model = @eval_model
56
+ @features = []
57
+ @labels = []
58
+ end
59
+
60
+ def add(element, label = nil)
61
+ @features << extract_features.call(element)
62
+ @labels << label unless label.nil?
63
+ end
64
+
65
+ def train
66
+ case
67
+ when Proc === train_model
68
+ train_model.call(@model_file, @features, @labels)
69
+ when String === train_model
70
+ SVMModel.R_train(@model_file, @features, @labels, train_model)
71
+ end
72
+ end
73
+
74
+ def eval(element)
75
+ case
76
+ when Proc === eval_model
77
+ eval_model.call(@model_file, extract_features.call(element), false)
78
+ when String === eval_model
79
+ SVMModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
80
+ end
81
+ end
82
+
83
+ def eval_list(elements, extract = true)
84
+ case
85
+ when Proc === eval_model
86
+ eval_model.call(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true)
87
+ when String === eval_model
88
+ SVMModel.R_eval(@model_file, extract ? elements.collect{|element| extract_features.call(element)} : elements, true, eval_model)
89
+ end
90
+ end
91
+
92
+ def cross_validation(folds = 10)
93
+ saved_features = @features
94
+ saved_labels = @labels
95
+ seq = (0..features.length - 1).to_a
96
+
97
+ chunk_size = features.length / folds
98
+
99
+ acc = []
100
+ folds.times do
101
+ seq = seq.shuffle
102
+ eval_chunk = seq[0..chunk_size]
103
+ train_chunk = seq[chunk_size.. -1]
104
+
105
+ eval_features = @features.values_at *eval_chunk
106
+ eval_labels = @labels.values_at *eval_chunk
107
+
108
+ @features = @features.values_at *train_chunk
109
+ @labels = @labels.values_at *train_chunk
110
+
111
+ train
112
+ predictions = eval_list eval_features, false
113
+
114
+ acc << predictions.zip(eval_labels).collect{|pred,lab| pred - lab < 0.5 ? 1 : 0}.inject(0){|acc,e| acc +=e} / chunk_size
115
+
116
+ @features = saved_features
117
+ @labels = saved_labels
118
+ end
119
+
120
+ acc
121
+ end
122
+ end
@@ -0,0 +1,68 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/network/paths'
3
+ require 'test/unit'
4
+ require 'rbbt/sources/string'
5
+ require 'set'
6
+
7
+
8
+ class TestNetwork < Test::Unit::TestCase
9
+ def test_dijsktra
10
+ string = STRING.protein_protein.tsv :persist => false, :fields => ["Interactor Ensembl Protein ID"], :type => :flat
11
+ string.unnamed = true
12
+
13
+ start_node = "ENSP00000256078"
14
+ end_node = "ENSP00000306245"
15
+
16
+ path = Paths.dijkstra(string, start_node, [end_node])
17
+
18
+ assert path != nil
19
+ assert path.include? start_node
20
+ assert path.include? end_node
21
+ end
22
+
23
+ def test_weighted_dijsktra
24
+ string = STRING.protein_protein.tsv
25
+
26
+ string.process "Score" do |scores|
27
+ scores.collect{|score| 1000 - score.to_i}
28
+ end
29
+ string.unnamed = true
30
+
31
+ start_node = "ENSP00000256078"
32
+ end_node = "ENSP00000306245"
33
+
34
+ path = Paths.weighted_dijkstra(string, start_node, end_node)
35
+
36
+ assert path != nil
37
+ assert path.include? start_node
38
+ assert path.include? end_node
39
+
40
+ path = Paths.weighted_dijkstra(string, start_node, Set.new([end_node]))
41
+
42
+ assert path != nil
43
+ assert path.include? start_node
44
+ assert path.include? end_node
45
+
46
+ end
47
+
48
+ def test_random_weighted_dijsktra
49
+ string = STRING.protein_protein.tsv
50
+
51
+ string.process "Score" do |scores|
52
+ scores.collect{|score| 1000 - score.to_i}
53
+ end
54
+ string.unnamed = true
55
+
56
+ start_node = "ENSP00000256078"
57
+ end_node = "ENSP00000306245"
58
+
59
+ path = Paths.random_weighted_dijkstra(string, 0.8, start_node, end_node)
60
+
61
+ assert path != nil
62
+ assert path.include? start_node
63
+ assert path.include? end_node
64
+ end
65
+
66
+ end
67
+
68
+
@@ -17,7 +17,7 @@ row3 a C Id4
17
17
  EOF
18
18
 
19
19
  TmpFile.with_file(content) do |filename|
20
- tsv = TSV.new(filename + '#:sep=/\s+/')
20
+ tsv = TSV.open(filename, :sep => /\s+/)
21
21
  counts = tsv.annotation_counts
22
22
  assert_equal 2, counts['a']
23
23
  end
@@ -36,10 +36,9 @@ row7 A B Id3
36
36
  EOF
37
37
 
38
38
  TmpFile.with_file(content) do |filename|
39
- tsv = TSV.new(filename + '#:sep=/\s+/')
39
+ tsv = TSV.open(filename, :sep => /\s+/)
40
40
 
41
- assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
42
- assert_equal %w(aa aaa), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue > 0.05 ? annot : nil}.compact
41
+ assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA", :fdr => false).collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
43
42
  end
44
43
  end
45
44
  end
@@ -0,0 +1,43 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../../test_helper')
2
+ require 'rbbt/vector/model/svm'
3
+ require 'rbbt/util/R'
4
+ require 'test/unit'
5
+
6
+ class TestSVMModel < Test::Unit::TestCase
7
+
8
+ def test_model
9
+ text =<<-EOF
10
+ 1 0;1;1
11
+ 1 1;0;1
12
+ 1 1;1;1
13
+ 1 0;1;1
14
+ 1 1;1;1
15
+ 0 0;1;0
16
+ 0 1;0;0
17
+ 0 0;1;0
18
+ 0 1;0;0
19
+ EOF
20
+
21
+ TmpFile.with_file() do |dir|
22
+ FileUtils.mkdir_p dir
23
+ model = SVMModel.new(dir)
24
+
25
+ model.extract_features = Proc.new{|element|
26
+ element.split(";")
27
+ }
28
+
29
+ text.split(/\n/).each do |line|
30
+ label, features = line.split(" ")
31
+ model.add(features, label)
32
+ end
33
+
34
+ model.train
35
+
36
+ assert model.eval("1;1;1") > 0.5
37
+ assert model.eval("0;0;0") < 0.5
38
+
39
+ assert_equal [true, false], model.eval_list(%w(1;1;1 0;0;0)).collect{|v| v > 0.5}
40
+ end
41
+ end
42
+
43
+ end
@@ -0,0 +1,74 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/vector/model'
3
+ require 'rbbt/util/R'
4
+ require 'test/unit'
5
+
6
+ class TestVectorModel < Test::Unit::TestCase
7
+
8
+ def test_model
9
+ text =<<-EOF
10
+ 1 0;1;1
11
+ 1 1;0;1
12
+ 1 1;1;1
13
+ 1 0;1;1
14
+ 1 1;1;1
15
+ 0 0;1;0
16
+ 0 1;0;0
17
+ 0 0;1;0
18
+ 0 1;0;0
19
+ EOF
20
+
21
+ TmpFile.with_file() do |dir|
22
+ FileUtils.mkdir_p dir
23
+ model = VectorModel.new(dir)
24
+
25
+ model.extract_features = Proc.new{|element|
26
+ element.split(";")
27
+ }
28
+
29
+ model.train_model = Proc.new{|model_file,features,labels|
30
+ TmpFile.with_file do |feature_file|
31
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
32
+ Open.write(feature_file + '.class', labels * "\n")
33
+ R.run <<-EOF
34
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
35
+ labels = scan("#{ feature_file }.class", what=numeric());
36
+ features = cbind(features, class = labels);
37
+ library(e1071)
38
+ model = svm(class ~ ., data = features)
39
+ save(model, file="#{ model_file }");
40
+ EOF
41
+ end
42
+ }
43
+
44
+ model.eval_model = Proc.new{|model_file,features|
45
+ TmpFile.with_file do |feature_file|
46
+ TmpFile.with_file do |results|
47
+ Open.write(feature_file, features * "\t")
48
+ puts R.run(<<-EOF
49
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
50
+ library(e1071)
51
+ load(file="#{ model_file }")
52
+ label = predict(model, features);
53
+ cat(label, file="#{results}");
54
+ EOF
55
+ ).read
56
+ Open.read(results)
57
+ end
58
+ end
59
+
60
+ }
61
+
62
+ text.split(/\n/).each do |line|
63
+ label, features = line.split(" ")
64
+ model.add(features, label)
65
+ end
66
+
67
+ model.train
68
+
69
+ assert model.eval("1;1;1").to_f > 0.5
70
+ assert model.eval("0;0;0").to_f < 0.5
71
+ end
72
+ end
73
+
74
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease: false
4
+ hash: 27
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-12-20 00:00:00 +01:00
18
+ date: 2011-10-04 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -32,6 +32,34 @@ dependencies:
32
32
  version: "0"
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rbbt-entities
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: RubyInline
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :runtime
62
+ version_requirements: *id003
35
63
  description: Data-mining and statistics
36
64
  email: miguel.vazquez@fdi.ucm.es
37
65
  executables: []
@@ -42,10 +70,17 @@ extra_rdoc_files:
42
70
  - LICENSE
43
71
  files:
44
72
  - LICENSE
73
+ - lib/rbbt/network/paths.rb
45
74
  - lib/rbbt/statistics/fdr.rb
46
75
  - lib/rbbt/statistics/hypergeometric.rb
76
+ - lib/rbbt/statistics/random_walk.rb
77
+ - lib/rbbt/vector/model.rb
78
+ - lib/rbbt/vector/model/svm.rb
47
79
  - test/rbbt/statistics/test_fdr.rb
48
80
  - test/rbbt/statistics/test_hypergeometric.rb
81
+ - test/rbbt/vector/test_model.rb
82
+ - test/rbbt/vector/model/test_svm.rb
83
+ - test/rbbt/network/test_paths.rb
49
84
  - test/test_helper.rb
50
85
  has_rdoc: true
51
86
  homepage: http://github.com/mikisvaz/rbbt-phgx
@@ -77,11 +112,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
112
  requirements: []
78
113
 
79
114
  rubyforge_project:
80
- rubygems_version: 1.3.7
115
+ rubygems_version: 1.6.2
81
116
  signing_key:
82
117
  specification_version: 3
83
118
  summary: Data-mining and statistics
84
119
  test_files:
85
120
  - test/rbbt/statistics/test_fdr.rb
86
121
  - test/rbbt/statistics/test_hypergeometric.rb
122
+ - test/rbbt/vector/test_model.rb
123
+ - test/rbbt/vector/model/test_svm.rb
124
+ - test/rbbt/network/test_paths.rb
87
125
  - test/test_helper.rb