nimbus 0.7 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/nimbus/application.rb +14 -2
- data/lib/nimbus/configuration.rb +5 -2
- data/lib/nimbus/forest.rb +16 -2
- data/lib/nimbus/tree.rb +39 -11
- metadata +2 -2
data/lib/nimbus/application.rb
CHANGED
@@ -31,6 +31,7 @@ module Nimbus
|
|
31
31
|
output_random_forest_file(@forest)
|
32
32
|
output_tree_errors_file(@forest)
|
33
33
|
output_training_file_predictions(@forest)
|
34
|
+
output_snp_importances_file(@forest)
|
34
35
|
end
|
35
36
|
|
36
37
|
if @config.do_testing
|
@@ -102,7 +103,7 @@ module Nimbus
|
|
102
103
|
}
|
103
104
|
}
|
104
105
|
Nimbus.message "* Predictions for the training sample saved to:"
|
105
|
-
Nimbus.message "* Output
|
106
|
+
Nimbus.message "* Output from training file: #{@config.output_training_file}"
|
106
107
|
Nimbus.message "*" * 50
|
107
108
|
end
|
108
109
|
|
@@ -113,7 +114,18 @@ module Nimbus
|
|
113
114
|
}
|
114
115
|
}
|
115
116
|
Nimbus.message "* Predictions for the testing set saved to:"
|
116
|
-
Nimbus.message "* Output
|
117
|
+
Nimbus.message "* Output from testing file: #{@config.output_testing_file}"
|
118
|
+
Nimbus.message "*" * 50
|
119
|
+
end
|
120
|
+
|
121
|
+
def output_snp_importances_file(forest)
|
122
|
+
File.open(@config.output_snp_importances_file , 'w') {|f|
|
123
|
+
forest.snp_importances.sort.each{|p|
|
124
|
+
f.write("SNP ##{p[0]}: #{p[1].round(5)}\n")
|
125
|
+
}
|
126
|
+
}
|
127
|
+
Nimbus.message "* SNP importances for the forest saved to:"
|
128
|
+
Nimbus.message "* Output snp importance file: #{@config.output_snp_importances_file}"
|
117
129
|
Nimbus.message "*" * 50
|
118
130
|
end
|
119
131
|
|
data/lib/nimbus/configuration.rb
CHANGED
@@ -17,7 +17,8 @@ module Nimbus
|
|
17
17
|
:output_forest_file,
|
18
18
|
:output_training_file,
|
19
19
|
:output_testing_file,
|
20
|
-
:output_tree_errors_file
|
20
|
+
:output_tree_errors_file,
|
21
|
+
:output_snp_importances_file
|
21
22
|
)
|
22
23
|
|
23
24
|
DEFAULTS = {
|
@@ -37,7 +38,8 @@ module Nimbus
|
|
37
38
|
:output_forest_file => 'random_forest.yml',
|
38
39
|
:output_training_file => 'training_file_predictions.txt',
|
39
40
|
:output_testing_file => 'testing_file_predictions.txt',
|
40
|
-
:output_tree_errors_file => 'generalization_errors.txt'
|
41
|
+
:output_tree_errors_file => 'generalization_errors.txt',
|
42
|
+
:output_snp_importances_file => 'snp_importances.txt'
|
41
43
|
}
|
42
44
|
|
43
45
|
|
@@ -56,6 +58,7 @@ module Nimbus
|
|
56
58
|
@output_training_file = File.expand_path(DEFAULTS[:output_training_file], Dir.pwd)
|
57
59
|
@output_testing_file = File.expand_path(DEFAULTS[:output_testing_file], Dir.pwd)
|
58
60
|
@output_tree_errors_file = File.expand_path(DEFAULTS[:output_tree_errors_file], Dir.pwd)
|
61
|
+
@output_snp_importances_file = File.expand_path(DEFAULTS[:output_snp_importances_file], Dir.pwd)
|
59
62
|
end
|
60
63
|
|
61
64
|
def tree
|
data/lib/nimbus/forest.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Nimbus
|
2
2
|
|
3
3
|
class Forest
|
4
|
-
attr_accessor :size, :trees, :bag, :predictions, :tree_errors
|
4
|
+
attr_accessor :size, :trees, :bag, :predictions, :tree_errors, :snp_importances
|
5
5
|
attr_accessor :options
|
6
6
|
|
7
7
|
def initialize(config)
|
@@ -10,7 +10,9 @@ module Nimbus
|
|
10
10
|
@options = config
|
11
11
|
@size = config.forest_size
|
12
12
|
@predictions = {}
|
13
|
-
@times_predicted =[]
|
13
|
+
@times_predicted = []
|
14
|
+
@snp_importances = {}
|
15
|
+
@tree_snp_importances = []
|
14
16
|
raise Nimbus::ForestError, "Forest size parameter (#{@size}) is invalid. You need at least one tree." if @size < 1
|
15
17
|
end
|
16
18
|
|
@@ -22,9 +24,11 @@ module Nimbus
|
|
22
24
|
tree = Tree.new @options.tree
|
23
25
|
@trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
|
24
26
|
@tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
|
27
|
+
@tree_snp_importances << tree.estimate_importances(tree_out_of_bag)
|
25
28
|
acumulate_predictions tree.predictions
|
26
29
|
Nimbus.clear_line!
|
27
30
|
end
|
31
|
+
average_snp_importances
|
28
32
|
average_predictions
|
29
33
|
end
|
30
34
|
|
@@ -76,6 +80,16 @@ module Nimbus
|
|
76
80
|
}
|
77
81
|
end
|
78
82
|
|
83
|
+
def average_snp_importances
|
84
|
+
1.upto(@options.tree_SNP_total_count) {|snp|
|
85
|
+
@snp_importances[snp] = 0.0
|
86
|
+
@tree_snp_importances.each{|tree_snp_importance|
|
87
|
+
@snp_importances[snp] += tree_snp_importance[snp] unless tree_snp_importance[snp].nil?
|
88
|
+
}
|
89
|
+
@snp_importances[snp] = @snp_importances[snp] / @size
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
79
93
|
end
|
80
94
|
|
81
95
|
end
|
data/lib/nimbus/tree.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Nimbus
|
2
2
|
|
3
3
|
class Tree
|
4
|
-
attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :generalization_error, :predictions
|
4
|
+
attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :used_snps, :structure, :generalization_error, :predictions, :importances
|
5
5
|
attr_accessor :individuals, :id_to_fenotype
|
6
6
|
|
7
7
|
def initialize(options)
|
@@ -14,19 +14,10 @@ module Nimbus
|
|
14
14
|
@individuals = all_individuals
|
15
15
|
@id_to_fenotype = ids_fenotypes
|
16
16
|
@predictions = {}
|
17
|
+
@used_snps = []
|
17
18
|
|
18
19
|
@structure = build_node individuals_sample, Nimbus::LossFunctions.average(individuals_sample, @id_to_fenotype)
|
19
20
|
end
|
20
|
-
|
21
|
-
def generalization_error_from_oob(oob_ids)
|
22
|
-
return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
|
23
|
-
oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
|
24
|
-
oob_predictions = {}
|
25
|
-
oob_ids.each do |oobi|
|
26
|
-
oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
|
27
|
-
end
|
28
|
-
@generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
|
29
|
-
end
|
30
21
|
|
31
22
|
def build_node(individuals_ids, y_hat)
|
32
23
|
# General loss function value for the node
|
@@ -60,8 +51,35 @@ module Nimbus
|
|
60
51
|
node_1 = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
|
61
52
|
node_2 = split[2].size == 0 ? label_node(parent_y_hat, []) : build_node(split[2], y_hats[2])
|
62
53
|
|
54
|
+
split_by_snp(snp)
|
63
55
|
return { snp => [node_0, node_1, node_2] }
|
64
56
|
end
|
57
|
+
|
58
|
+
def generalization_error_from_oob(oob_ids)
|
59
|
+
return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
|
60
|
+
oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
|
61
|
+
oob_predictions = {}
|
62
|
+
oob_ids.each do |oobi|
|
63
|
+
oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
|
64
|
+
end
|
65
|
+
@generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
|
66
|
+
end
|
67
|
+
|
68
|
+
def estimate_importances(oob_ids)
|
69
|
+
return nil if (@generalization_error.nil? && generalization_error_from_oob(oob_ids))
|
70
|
+
oob_individuals_count = oob_ids.size
|
71
|
+
@importances = {}
|
72
|
+
@used_snps.uniq.each do |current_snp|
|
73
|
+
shuffled_ids = oob_ids.shuffle
|
74
|
+
permutated_snp_error = 0.0
|
75
|
+
oob_ids.each_with_index {|oobi, index|
|
76
|
+
permutated_prediction = traverse_with_permutation @structure, individuals[oobi].snp_list, current_snp, individuals[shuffled_ids[index]].snp_list
|
77
|
+
permutated_snp_error += Nimbus::LossFunctions.mean_squared_error [oobi], @id_to_fenotype, permutated_prediction
|
78
|
+
}
|
79
|
+
@importances[current_snp] = ((permutated_snp_error / oob_individuals_count) - @generalization_error).round(5)
|
80
|
+
end
|
81
|
+
@importances
|
82
|
+
end
|
65
83
|
|
66
84
|
def self.traverse(tree_structure, data)
|
67
85
|
return tree_structure if tree_structure.is_a? Numeric
|
@@ -69,6 +87,12 @@ module Nimbus
|
|
69
87
|
return self.traverse( tree_structure.values.first[ data[tree_structure.keys.first - 1].to_i], data)
|
70
88
|
end
|
71
89
|
|
90
|
+
def traverse_with_permutation(tree_structure, data, snp_to_permute, individual_to_permute)
|
91
|
+
return tree_structure if tree_structure.is_a? Numeric
|
92
|
+
individual_data = (tree_structure.keys.first == snp_to_permute ? individual_to_permute : data)
|
93
|
+
return traverse_with_permutation( tree_structure.values.first[ individual_data[tree_structure.keys.first - 1].to_i], data, snp_to_permute, individual_to_permute)
|
94
|
+
end
|
95
|
+
|
72
96
|
|
73
97
|
private
|
74
98
|
|
@@ -92,6 +116,10 @@ module Nimbus
|
|
92
116
|
raise Nimbus::TreeError, "Values for SNPs columns must be in [0, 1, 2]"
|
93
117
|
end
|
94
118
|
|
119
|
+
def split_by_snp(x)
|
120
|
+
@used_snps << x
|
121
|
+
end
|
122
|
+
|
95
123
|
end
|
96
124
|
|
97
125
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: nimbus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: "0.
|
5
|
+
version: "0.8"
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- "Juanjo Baz\xC3\xA1n"
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2011-08-
|
14
|
+
date: 2011-08-23 00:00:00 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|