nimbus 0.7 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/nimbus/application.rb +14 -2
- data/lib/nimbus/configuration.rb +5 -2
- data/lib/nimbus/forest.rb +16 -2
- data/lib/nimbus/tree.rb +39 -11
- metadata +2 -2
data/lib/nimbus/application.rb
CHANGED
@@ -31,6 +31,7 @@ module Nimbus
|
|
31
31
|
output_random_forest_file(@forest)
|
32
32
|
output_tree_errors_file(@forest)
|
33
33
|
output_training_file_predictions(@forest)
|
34
|
+
output_snp_importances_file(@forest)
|
34
35
|
end
|
35
36
|
|
36
37
|
if @config.do_testing
|
@@ -102,7 +103,7 @@ module Nimbus
|
|
102
103
|
}
|
103
104
|
}
|
104
105
|
Nimbus.message "* Predictions for the training sample saved to:"
|
105
|
-
Nimbus.message "* Output
|
106
|
+
Nimbus.message "* Output from training file: #{@config.output_training_file}"
|
106
107
|
Nimbus.message "*" * 50
|
107
108
|
end
|
108
109
|
|
@@ -113,7 +114,18 @@ module Nimbus
|
|
113
114
|
}
|
114
115
|
}
|
115
116
|
Nimbus.message "* Predictions for the testing set saved to:"
|
116
|
-
Nimbus.message "* Output
|
117
|
+
Nimbus.message "* Output from testing file: #{@config.output_testing_file}"
|
118
|
+
Nimbus.message "*" * 50
|
119
|
+
end
|
120
|
+
|
121
|
+
def output_snp_importances_file(forest)
|
122
|
+
File.open(@config.output_snp_importances_file , 'w') {|f|
|
123
|
+
forest.snp_importances.sort.each{|p|
|
124
|
+
f.write("SNP ##{p[0]}: #{p[1].round(5)}\n")
|
125
|
+
}
|
126
|
+
}
|
127
|
+
Nimbus.message "* SNP importances for the forest saved to:"
|
128
|
+
Nimbus.message "* Output snp importance file: #{@config.output_snp_importances_file}"
|
117
129
|
Nimbus.message "*" * 50
|
118
130
|
end
|
119
131
|
|
data/lib/nimbus/configuration.rb
CHANGED
@@ -17,7 +17,8 @@ module Nimbus
|
|
17
17
|
:output_forest_file,
|
18
18
|
:output_training_file,
|
19
19
|
:output_testing_file,
|
20
|
-
:output_tree_errors_file
|
20
|
+
:output_tree_errors_file,
|
21
|
+
:output_snp_importances_file
|
21
22
|
)
|
22
23
|
|
23
24
|
DEFAULTS = {
|
@@ -37,7 +38,8 @@ module Nimbus
|
|
37
38
|
:output_forest_file => 'random_forest.yml',
|
38
39
|
:output_training_file => 'training_file_predictions.txt',
|
39
40
|
:output_testing_file => 'testing_file_predictions.txt',
|
40
|
-
:output_tree_errors_file => 'generalization_errors.txt'
|
41
|
+
:output_tree_errors_file => 'generalization_errors.txt',
|
42
|
+
:output_snp_importances_file => 'snp_importances.txt'
|
41
43
|
}
|
42
44
|
|
43
45
|
|
@@ -56,6 +58,7 @@ module Nimbus
|
|
56
58
|
@output_training_file = File.expand_path(DEFAULTS[:output_training_file], Dir.pwd)
|
57
59
|
@output_testing_file = File.expand_path(DEFAULTS[:output_testing_file], Dir.pwd)
|
58
60
|
@output_tree_errors_file = File.expand_path(DEFAULTS[:output_tree_errors_file], Dir.pwd)
|
61
|
+
@output_snp_importances_file = File.expand_path(DEFAULTS[:output_snp_importances_file], Dir.pwd)
|
59
62
|
end
|
60
63
|
|
61
64
|
def tree
|
data/lib/nimbus/forest.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Nimbus
|
2
2
|
|
3
3
|
class Forest
|
4
|
-
attr_accessor :size, :trees, :bag, :predictions, :tree_errors
|
4
|
+
attr_accessor :size, :trees, :bag, :predictions, :tree_errors, :snp_importances
|
5
5
|
attr_accessor :options
|
6
6
|
|
7
7
|
def initialize(config)
|
@@ -10,7 +10,9 @@ module Nimbus
|
|
10
10
|
@options = config
|
11
11
|
@size = config.forest_size
|
12
12
|
@predictions = {}
|
13
|
-
@times_predicted =[]
|
13
|
+
@times_predicted = []
|
14
|
+
@snp_importances = {}
|
15
|
+
@tree_snp_importances = []
|
14
16
|
raise Nimbus::ForestError, "Forest size parameter (#{@size}) is invalid. You need at least one tree." if @size < 1
|
15
17
|
end
|
16
18
|
|
@@ -22,9 +24,11 @@ module Nimbus
|
|
22
24
|
tree = Tree.new @options.tree
|
23
25
|
@trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
|
24
26
|
@tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
|
27
|
+
@tree_snp_importances << tree.estimate_importances(tree_out_of_bag)
|
25
28
|
acumulate_predictions tree.predictions
|
26
29
|
Nimbus.clear_line!
|
27
30
|
end
|
31
|
+
average_snp_importances
|
28
32
|
average_predictions
|
29
33
|
end
|
30
34
|
|
@@ -76,6 +80,16 @@ module Nimbus
|
|
76
80
|
}
|
77
81
|
end
|
78
82
|
|
83
|
+
def average_snp_importances
|
84
|
+
1.upto(@options.tree_SNP_total_count) {|snp|
|
85
|
+
@snp_importances[snp] = 0.0
|
86
|
+
@tree_snp_importances.each{|tree_snp_importance|
|
87
|
+
@snp_importances[snp] += tree_snp_importance[snp] unless tree_snp_importance[snp].nil?
|
88
|
+
}
|
89
|
+
@snp_importances[snp] = @snp_importances[snp] / @size
|
90
|
+
}
|
91
|
+
end
|
92
|
+
|
79
93
|
end
|
80
94
|
|
81
95
|
end
|
data/lib/nimbus/tree.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Nimbus
|
2
2
|
|
3
3
|
class Tree
|
4
|
-
attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :generalization_error, :predictions
|
4
|
+
attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :used_snps, :structure, :generalization_error, :predictions, :importances
|
5
5
|
attr_accessor :individuals, :id_to_fenotype
|
6
6
|
|
7
7
|
def initialize(options)
|
@@ -14,19 +14,10 @@ module Nimbus
|
|
14
14
|
@individuals = all_individuals
|
15
15
|
@id_to_fenotype = ids_fenotypes
|
16
16
|
@predictions = {}
|
17
|
+
@used_snps = []
|
17
18
|
|
18
19
|
@structure = build_node individuals_sample, Nimbus::LossFunctions.average(individuals_sample, @id_to_fenotype)
|
19
20
|
end
|
20
|
-
|
21
|
-
def generalization_error_from_oob(oob_ids)
|
22
|
-
return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
|
23
|
-
oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
|
24
|
-
oob_predictions = {}
|
25
|
-
oob_ids.each do |oobi|
|
26
|
-
oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
|
27
|
-
end
|
28
|
-
@generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
|
29
|
-
end
|
30
21
|
|
31
22
|
def build_node(individuals_ids, y_hat)
|
32
23
|
# General loss function value for the node
|
@@ -60,8 +51,35 @@ module Nimbus
|
|
60
51
|
node_1 = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
|
61
52
|
node_2 = split[2].size == 0 ? label_node(parent_y_hat, []) : build_node(split[2], y_hats[2])
|
62
53
|
|
54
|
+
split_by_snp(snp)
|
63
55
|
return { snp => [node_0, node_1, node_2] }
|
64
56
|
end
|
57
|
+
|
58
|
+
def generalization_error_from_oob(oob_ids)
|
59
|
+
return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
|
60
|
+
oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
|
61
|
+
oob_predictions = {}
|
62
|
+
oob_ids.each do |oobi|
|
63
|
+
oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
|
64
|
+
end
|
65
|
+
@generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
|
66
|
+
end
|
67
|
+
|
68
|
+
def estimate_importances(oob_ids)
|
69
|
+
return nil if (@generalization_error.nil? && generalization_error_from_oob(oob_ids))
|
70
|
+
oob_individuals_count = oob_ids.size
|
71
|
+
@importances = {}
|
72
|
+
@used_snps.uniq.each do |current_snp|
|
73
|
+
shuffled_ids = oob_ids.shuffle
|
74
|
+
permutated_snp_error = 0.0
|
75
|
+
oob_ids.each_with_index {|oobi, index|
|
76
|
+
permutated_prediction = traverse_with_permutation @structure, individuals[oobi].snp_list, current_snp, individuals[shuffled_ids[index]].snp_list
|
77
|
+
permutated_snp_error += Nimbus::LossFunctions.mean_squared_error [oobi], @id_to_fenotype, permutated_prediction
|
78
|
+
}
|
79
|
+
@importances[current_snp] = ((permutated_snp_error / oob_individuals_count) - @generalization_error).round(5)
|
80
|
+
end
|
81
|
+
@importances
|
82
|
+
end
|
65
83
|
|
66
84
|
def self.traverse(tree_structure, data)
|
67
85
|
return tree_structure if tree_structure.is_a? Numeric
|
@@ -69,6 +87,12 @@ module Nimbus
|
|
69
87
|
return self.traverse( tree_structure.values.first[ data[tree_structure.keys.first - 1].to_i], data)
|
70
88
|
end
|
71
89
|
|
90
|
+
def traverse_with_permutation(tree_structure, data, snp_to_permute, individual_to_permute)
|
91
|
+
return tree_structure if tree_structure.is_a? Numeric
|
92
|
+
individual_data = (tree_structure.keys.first == snp_to_permute ? individual_to_permute : data)
|
93
|
+
return traverse_with_permutation( tree_structure.values.first[ individual_data[tree_structure.keys.first - 1].to_i], data, snp_to_permute, individual_to_permute)
|
94
|
+
end
|
95
|
+
|
72
96
|
|
73
97
|
private
|
74
98
|
|
@@ -92,6 +116,10 @@ module Nimbus
|
|
92
116
|
raise Nimbus::TreeError, "Values for SNPs columns must be in [0, 1, 2]"
|
93
117
|
end
|
94
118
|
|
119
|
+
def split_by_snp(x)
|
120
|
+
@used_snps << x
|
121
|
+
end
|
122
|
+
|
95
123
|
end
|
96
124
|
|
97
125
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: nimbus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: "0.
|
5
|
+
version: "0.8"
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- "Juanjo Baz\xC3\xA1n"
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2011-08-
|
14
|
+
date: 2011-08-23 00:00:00 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|