nimbus 0.6.1 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/nimbus/application.rb +12 -0
- data/lib/nimbus/configuration.rb +5 -2
- data/lib/nimbus/forest.rb +4 -2
- data/lib/nimbus/tree.rb +12 -2
- metadata +1 -1
data/lib/nimbus/application.rb
CHANGED
@@ -29,6 +29,7 @@ module Nimbus
|
|
29
29
|
@forest = ::Nimbus::Forest.new @config
|
30
30
|
@forest.grow
|
31
31
|
output_random_forest_file(@forest)
|
32
|
+
output_tree_errors_file(@forest)
|
32
33
|
output_training_file_predictions(@forest)
|
33
34
|
end
|
34
35
|
|
@@ -83,6 +84,17 @@ module Nimbus
|
|
83
84
|
Nimbus.message "*" * 50
|
84
85
|
end
|
85
86
|
|
87
|
+
def output_tree_errors_file(forest)
|
88
|
+
File.open(@config.output_tree_errors_file , 'w') {|f|
|
89
|
+
1.upto(forest.tree_errors.size) do |te|
|
90
|
+
f.write("generalization error for tree ##{te}: #{forest.tree_errors[te-1].round(5)}\n")
|
91
|
+
end
|
92
|
+
}
|
93
|
+
Nimbus.message "* Generalization errors for every tree saved to:"
|
94
|
+
Nimbus.message "* Output tree errors file: #{@config.output_tree_errors_file}"
|
95
|
+
Nimbus.message "*" * 50
|
96
|
+
end
|
97
|
+
|
86
98
|
def output_training_file_predictions(forest)
|
87
99
|
File.open(@config.output_training_file , 'w') {|f|
|
88
100
|
forest.predictions.sort.each{|p|
|
data/lib/nimbus/configuration.rb
CHANGED
@@ -16,7 +16,8 @@ module Nimbus
|
|
16
16
|
:training_set,
|
17
17
|
:output_forest_file,
|
18
18
|
:output_training_file,
|
19
|
-
:output_testing_file
|
19
|
+
:output_testing_file,
|
20
|
+
:output_tree_errors_file
|
20
21
|
)
|
21
22
|
|
22
23
|
DEFAULTS = {
|
@@ -35,7 +36,8 @@ module Nimbus
|
|
35
36
|
|
36
37
|
:output_forest_file => 'random_forest.yml',
|
37
38
|
:output_training_file => 'training_file_predictions.txt',
|
38
|
-
:output_testing_file => 'testing_file_predictions.txt'
|
39
|
+
:output_testing_file => 'testing_file_predictions.txt',
|
40
|
+
:output_tree_errors_file => 'generalization_errors.txt'
|
39
41
|
}
|
40
42
|
|
41
43
|
|
@@ -53,6 +55,7 @@ module Nimbus
|
|
53
55
|
@output_forest_file = File.expand_path(DEFAULTS[:output_forest_file], Dir.pwd)
|
54
56
|
@output_training_file = File.expand_path(DEFAULTS[:output_training_file], Dir.pwd)
|
55
57
|
@output_testing_file = File.expand_path(DEFAULTS[:output_testing_file], Dir.pwd)
|
58
|
+
@output_tree_errors_file = File.expand_path(DEFAULTS[:output_tree_errors_file], Dir.pwd)
|
56
59
|
end
|
57
60
|
|
58
61
|
def tree
|
data/lib/nimbus/forest.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
module Nimbus
|
2
2
|
|
3
3
|
class Forest
|
4
|
-
attr_accessor :size, :trees, :bag, :predictions
|
4
|
+
attr_accessor :size, :trees, :bag, :predictions, :tree_errors
|
5
5
|
attr_accessor :options
|
6
6
|
|
7
7
|
def initialize(config)
|
8
8
|
@trees = []
|
9
|
+
@tree_errors = []
|
9
10
|
@options = config
|
10
11
|
@size = config.forest_size
|
11
12
|
@predictions = {}
|
@@ -20,6 +21,7 @@ module Nimbus
|
|
20
21
|
tree_out_of_bag = oob tree_individuals_bag
|
21
22
|
tree = Tree.new @options.tree
|
22
23
|
@trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
|
24
|
+
@tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
|
23
25
|
acumulate_predictions tree.predictions
|
24
26
|
Nimbus.clear_line!
|
25
27
|
end
|
@@ -30,7 +32,7 @@ module Nimbus
|
|
30
32
|
@predictions = {}
|
31
33
|
prediction_count = trees.size
|
32
34
|
@options.read_testing_data{|individual|
|
33
|
-
individual_prediction=0.0
|
35
|
+
individual_prediction = 0.0
|
34
36
|
trees.each do |t|
|
35
37
|
individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
|
36
38
|
end
|
data/lib/nimbus/tree.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Nimbus
|
2
2
|
|
3
3
|
class Tree
|
4
|
-
attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :predictions
|
4
|
+
attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :generalization_error, :predictions
|
5
5
|
attr_accessor :individuals, :id_to_fenotype
|
6
6
|
|
7
7
|
def initialize(options)
|
@@ -17,6 +17,16 @@ module Nimbus
|
|
17
17
|
|
18
18
|
@structure = build_node individuals_sample, Nimbus::LossFunctions.average(individuals_sample, @id_to_fenotype)
|
19
19
|
end
|
20
|
+
|
21
|
+
def generalization_error_from_oob(oob_ids)
|
22
|
+
return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
|
23
|
+
oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
|
24
|
+
oob_predictions = {}
|
25
|
+
oob_ids.each do |oobi|
|
26
|
+
oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
|
27
|
+
end
|
28
|
+
@generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
|
29
|
+
end
|
20
30
|
|
21
31
|
def build_node(individuals_ids, y_hat)
|
22
32
|
# General loss function value for the node
|
@@ -26,7 +36,7 @@ module Nimbus
|
|
26
36
|
|
27
37
|
# Finding the SNP that minimizes loss function
|
28
38
|
snps = snps_random_sample
|
29
|
-
min_loss, min_SNP, split, means
|
39
|
+
min_loss, min_SNP, split, means = node_loss_function, nil, nil, nil
|
30
40
|
|
31
41
|
snps.each do |snp|
|
32
42
|
individuals_split_by_snp_value = split_by_snp_value individuals_ids, snp
|