nimbus 0.6.1 → 0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/nimbus/application.rb +12 -0
- data/lib/nimbus/configuration.rb +5 -2
- data/lib/nimbus/forest.rb +4 -2
- data/lib/nimbus/tree.rb +12 -2
- metadata +1 -1
data/lib/nimbus/application.rb
CHANGED
@@ -29,6 +29,7 @@ module Nimbus
|
|
29
29
|
@forest = ::Nimbus::Forest.new @config
|
30
30
|
@forest.grow
|
31
31
|
output_random_forest_file(@forest)
|
32
|
+
output_tree_errors_file(@forest)
|
32
33
|
output_training_file_predictions(@forest)
|
33
34
|
end
|
34
35
|
|
@@ -83,6 +84,17 @@ module Nimbus
|
|
83
84
|
Nimbus.message "*" * 50
|
84
85
|
end
|
85
86
|
|
87
|
+
def output_tree_errors_file(forest)
|
88
|
+
File.open(@config.output_tree_errors_file , 'w') {|f|
|
89
|
+
1.upto(forest.tree_errors.size) do |te|
|
90
|
+
f.write("generalization error for tree ##{te}: #{forest.tree_errors[te-1].round(5)}\n")
|
91
|
+
end
|
92
|
+
}
|
93
|
+
Nimbus.message "* Generalization errors for every tree saved to:"
|
94
|
+
Nimbus.message "* Output tree errors file: #{@config.output_tree_errors_file}"
|
95
|
+
Nimbus.message "*" * 50
|
96
|
+
end
|
97
|
+
|
86
98
|
def output_training_file_predictions(forest)
|
87
99
|
File.open(@config.output_training_file , 'w') {|f|
|
88
100
|
forest.predictions.sort.each{|p|
|
data/lib/nimbus/configuration.rb
CHANGED
@@ -16,7 +16,8 @@ module Nimbus
|
|
16
16
|
:training_set,
|
17
17
|
:output_forest_file,
|
18
18
|
:output_training_file,
|
19
|
-
:output_testing_file
|
19
|
+
:output_testing_file,
|
20
|
+
:output_tree_errors_file
|
20
21
|
)
|
21
22
|
|
22
23
|
DEFAULTS = {
|
@@ -35,7 +36,8 @@ module Nimbus
|
|
35
36
|
|
36
37
|
:output_forest_file => 'random_forest.yml',
|
37
38
|
:output_training_file => 'training_file_predictions.txt',
|
38
|
-
:output_testing_file => 'testing_file_predictions.txt'
|
39
|
+
:output_testing_file => 'testing_file_predictions.txt',
|
40
|
+
:output_tree_errors_file => 'generalization_errors.txt'
|
39
41
|
}
|
40
42
|
|
41
43
|
|
@@ -53,6 +55,7 @@ module Nimbus
|
|
53
55
|
@output_forest_file = File.expand_path(DEFAULTS[:output_forest_file], Dir.pwd)
|
54
56
|
@output_training_file = File.expand_path(DEFAULTS[:output_training_file], Dir.pwd)
|
55
57
|
@output_testing_file = File.expand_path(DEFAULTS[:output_testing_file], Dir.pwd)
|
58
|
+
@output_tree_errors_file = File.expand_path(DEFAULTS[:output_tree_errors_file], Dir.pwd)
|
56
59
|
end
|
57
60
|
|
58
61
|
def tree
|
data/lib/nimbus/forest.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
module Nimbus
|
2
2
|
|
3
3
|
class Forest
|
4
|
-
attr_accessor :size, :trees, :bag, :predictions
|
4
|
+
attr_accessor :size, :trees, :bag, :predictions, :tree_errors
|
5
5
|
attr_accessor :options
|
6
6
|
|
7
7
|
def initialize(config)
|
8
8
|
@trees = []
|
9
|
+
@tree_errors = []
|
9
10
|
@options = config
|
10
11
|
@size = config.forest_size
|
11
12
|
@predictions = {}
|
@@ -20,6 +21,7 @@ module Nimbus
|
|
20
21
|
tree_out_of_bag = oob tree_individuals_bag
|
21
22
|
tree = Tree.new @options.tree
|
22
23
|
@trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
|
24
|
+
@tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
|
23
25
|
acumulate_predictions tree.predictions
|
24
26
|
Nimbus.clear_line!
|
25
27
|
end
|
@@ -30,7 +32,7 @@ module Nimbus
|
|
30
32
|
@predictions = {}
|
31
33
|
prediction_count = trees.size
|
32
34
|
@options.read_testing_data{|individual|
|
33
|
-
individual_prediction=0.0
|
35
|
+
individual_prediction = 0.0
|
34
36
|
trees.each do |t|
|
35
37
|
individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
|
36
38
|
end
|
data/lib/nimbus/tree.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Nimbus
|
2
2
|
|
3
3
|
class Tree
|
4
|
-
attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :predictions
|
4
|
+
attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :generalization_error, :predictions
|
5
5
|
attr_accessor :individuals, :id_to_fenotype
|
6
6
|
|
7
7
|
def initialize(options)
|
@@ -17,6 +17,16 @@ module Nimbus
|
|
17
17
|
|
18
18
|
@structure = build_node individuals_sample, Nimbus::LossFunctions.average(individuals_sample, @id_to_fenotype)
|
19
19
|
end
|
20
|
+
|
21
|
+
def generalization_error_from_oob(oob_ids)
|
22
|
+
return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
|
23
|
+
oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
|
24
|
+
oob_predictions = {}
|
25
|
+
oob_ids.each do |oobi|
|
26
|
+
oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
|
27
|
+
end
|
28
|
+
@generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
|
29
|
+
end
|
20
30
|
|
21
31
|
def build_node(individuals_ids, y_hat)
|
22
32
|
# General loss function value for the node
|
@@ -26,7 +36,7 @@ module Nimbus
|
|
26
36
|
|
27
37
|
# Finding the SNP that minimizes loss function
|
28
38
|
snps = snps_random_sample
|
29
|
-
min_loss, min_SNP, split, means
|
39
|
+
min_loss, min_SNP, split, means = node_loss_function, nil, nil, nil
|
30
40
|
|
31
41
|
snps.each do |snp|
|
32
42
|
individuals_split_by_snp_value = split_by_snp_value individuals_ids, snp
|